From 55b389fbcd10995ddd7346eedf7b4d25a55834cc Mon Sep 17 00:00:00 2001 From: Francesco Lombardo Date: Mon, 18 Nov 2024 11:59:58 +0100 Subject: [PATCH 1/7] fix export pivoted csv --- superset/charts/post_processing.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/superset/charts/post_processing.py b/superset/charts/post_processing.py index 4c5abd8db19f1..4e944708454b3 100644 --- a/superset/charts/post_processing.py +++ b/superset/charts/post_processing.py @@ -89,6 +89,12 @@ def pivot_df( # pylint: disable=too-many-locals, too-many-arguments, too-many-s # pivot data; we'll compute totals and subtotals later if rows or columns: + # Split the first column using ";" and expand it into multiple columns + # Backup the original column names to reassign them later + columns_backup = df.columns[0].split(";") + df = df.iloc[:,0].str.split(';', expand=True) + df.columns = columns_backup + df = df.pivot_table( index=rows, columns=columns, From 5732122ca52f79a206fbba05a8bff8f26e47ebfd Mon Sep 17 00:00:00 2001 From: Francesco Lombardo Date: Mon, 18 Nov 2024 14:23:36 +0100 Subject: [PATCH 2/7] fix csv export updating pd.read_csv arguments --- superset/charts/post_processing.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/superset/charts/post_processing.py b/superset/charts/post_processing.py index 4e944708454b3..8785b92202761 100644 --- a/superset/charts/post_processing.py +++ b/superset/charts/post_processing.py @@ -26,6 +26,7 @@ for these chart types. """ +import superset_config from io import StringIO from typing import Any, Optional, TYPE_CHECKING, Union @@ -89,12 +90,6 @@ def pivot_df( # pylint: disable=too-many-locals, too-many-arguments, too-many-s # pivot data; we'll compute totals and subtotals later if rows or columns: - # Split the first column using ";" and expand it into multiple columns - # Backup the original column names to reassign them later - columns_backup = df.columns[0].split(";") - df = df.iloc[:,0].str.split(';', expand=True) - df.columns = columns_backup - df = df.pivot_table( index=rows, columns=columns, @@ -333,8 +328,11 @@ def apply_post_process( if query["result_format"] == ChartDataResultFormat.JSON: df = pd.DataFrame.from_dict(data) elif query["result_format"] == ChartDataResultFormat.CSV: - df = pd.read_csv(StringIO(data)) - + df = pd.read_csv(StringIO(data), + delimiter=superset_config.CSV_EXPORT.get('sep'), + encoding=superset_config.CSV_EXPORT.get('encoding'), + decimal=superset_config.CSV_EXPORT.get('decimal')) + # convert all columns to verbose (label) name if datasource: df.rename(columns=datasource.data["verbose_map"], inplace=True) From c4b416e4f947e64d4a17b4337ceee66e6dfb6b78 Mon Sep 17 00:00:00 2001 From: Francesco Lombardo Date: Tue, 19 Nov 2024 12:21:04 +0100 Subject: [PATCH 3/7] added fix to export pivoted table into .csv without flattening of multiindex rows and columns --- superset/charts/post_processing.py | 34 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/superset/charts/post_processing.py b/superset/charts/post_processing.py index 8785b92202761..211a8ea0005b8 100644 --- a/superset/charts/post_processing.py +++ b/superset/charts/post_processing.py @@ -343,25 +343,25 @@ def apply_post_process( query["indexnames"] = list(processed_df.index) query["coltypes"] = extract_dataframe_dtypes(processed_df, datasource) query["rowcount"] = len(processed_df.index) - - # Flatten hierarchical columns/index since they are represented as - # `Tuple[str]`. Otherwise encoding to JSON later will fail because - # maps cannot have tuples as their keys in JSON. - processed_df.columns = [ - " ".join(str(name) for name in column).strip() - if isinstance(column, tuple) - else column - for column in processed_df.columns - ] - processed_df.index = [ - " ".join(str(name) for name in index).strip() - if isinstance(index, tuple) - else index - for index in processed_df.index - ] - + if query["result_format"] == ChartDataResultFormat.JSON: + # Flatten hierarchical columns/index since they are represented as + # `Tuple[str]`. Otherwise encoding to JSON later will fail because + # maps cannot have tuples as their keys in JSON. + processed_df.columns = [ + " ".join(str(name) for name in column).strip() + if isinstance(column, tuple) + else column + for column in processed_df.columns + ] + processed_df.index = [ + " ".join(str(name) for name in index).strip() + if isinstance(index, tuple) + else index + for index in processed_df.index + ] query["data"] = processed_df.to_dict() + elif query["result_format"] == ChartDataResultFormat.CSV: buf = StringIO() processed_df.to_csv(buf) From 86521a4de5e2550f3ed130852b6e8d8dc012310e Mon Sep 17 00:00:00 2001 From: Francesco Lombardo Date: Wed, 20 Nov 2024 15:52:21 +0100 Subject: [PATCH 4/7] fixed dependency to import CSV_EXPORT settings, added these settings in to_csv --- superset/charts/post_processing.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/superset/charts/post_processing.py b/superset/charts/post_processing.py index 211a8ea0005b8..e384d34e895eb 100644 --- a/superset/charts/post_processing.py +++ b/superset/charts/post_processing.py @@ -26,7 +26,7 @@ for these chart types. """ -import superset_config +from superset import app from io import StringIO from typing import Any, Optional, TYPE_CHECKING, Union @@ -45,6 +45,7 @@ from superset.connectors.sqla.models import BaseDatasource from superset.models.sql_lab import Query +csv_export_settings = app.config.get('CSV_EXPORT') def get_column_key(label: tuple[str, ...], metrics: list[str]) -> tuple[Any, ...]: """ @@ -329,9 +330,9 @@ def apply_post_process( df = pd.DataFrame.from_dict(data) elif query["result_format"] == ChartDataResultFormat.CSV: df = pd.read_csv(StringIO(data), - delimiter=superset_config.CSV_EXPORT.get('sep'), - encoding=superset_config.CSV_EXPORT.get('encoding'), - decimal=superset_config.CSV_EXPORT.get('decimal')) + sep=csv_export_settings.get('sep', ','), + encoding=csv_export_settings.get('encoding', 'utf-8'), + decimal=csv_export_settings.get('decimal', '.')) # convert all columns to verbose (label) name if datasource: @@ -364,8 +365,11 @@ def apply_post_process( elif query["result_format"] == ChartDataResultFormat.CSV: buf = StringIO() - processed_df.to_csv(buf) + processed_df.to_csv(buf, + sep=csv_export_settings.get('sep', ','), + encoding=csv_export_settings.get('encoding', 'utf-8'), + decimal=csv_export_settings.get('decimal', '.')) buf.seek(0) query["data"] = buf.getvalue() - return result + return result \ No newline at end of file From 8640f72dbd0cd4fe38f965bfbe4378f67518b787 Mon Sep 17 00:00:00 2001 From: Evan Rusackas Date: Mon, 25 Nov 2024 14:11:51 -0700 Subject: [PATCH 5/7] Fising pre-commit issues --- superset/charts/post_processing.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/superset/charts/post_processing.py b/superset/charts/post_processing.py index e384d34e895eb..d07e7f51284f4 100644 --- a/superset/charts/post_processing.py +++ b/superset/charts/post_processing.py @@ -329,9 +329,9 @@ def apply_post_process( if query["result_format"] == ChartDataResultFormat.JSON: df = pd.DataFrame.from_dict(data) elif query["result_format"] == ChartDataResultFormat.CSV: - df = pd.read_csv(StringIO(data), - sep=csv_export_settings.get('sep', ','), - encoding=csv_export_settings.get('encoding', 'utf-8'), + df = pd.read_csv(StringIO(data), + sep=csv_export_settings.get('sep', ','), + encoding=csv_export_settings.get('encoding', 'utf-8'), decimal=csv_export_settings.get('decimal', '.')) # convert all columns to verbose (label) name @@ -365,11 +365,11 @@ def apply_post_process( elif query["result_format"] == ChartDataResultFormat.CSV: buf = StringIO() - processed_df.to_csv(buf, - sep=csv_export_settings.get('sep', ','), - encoding=csv_export_settings.get('encoding', 'utf-8'), + processed_df.to_csv(buf, + sep=csv_export_settings.get('sep', ','), + encoding=csv_export_settings.get('encoding', 'utf-8'), decimal=csv_export_settings.get('decimal', '.')) buf.seek(0) query["data"] = buf.getvalue() - return result \ No newline at end of file + return result From 0db365fd5fccf00415f36a17df51a44f119a4e33 Mon Sep 17 00:00:00 2001 From: Francesco Lombardo Date: Thu, 5 Dec 2024 16:25:56 +0100 Subject: [PATCH 6/7] merge updated with fix --- superset/charts/post_processing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/superset/charts/post_processing.py b/superset/charts/post_processing.py index d07e7f51284f4..b3a10473bfd2a 100644 --- a/superset/charts/post_processing.py +++ b/superset/charts/post_processing.py @@ -26,7 +26,7 @@ for these chart types. """ -from superset import app +from flask import current_app from io import StringIO from typing import Any, Optional, TYPE_CHECKING, Union @@ -45,7 +45,7 @@ from superset.connectors.sqla.models import BaseDatasource from superset.models.sql_lab import Query -csv_export_settings = app.config.get('CSV_EXPORT') +csv_export_settings = current_app.config.get('CSV_EXPORT') def get_column_key(label: tuple[str, ...], metrics: list[str]) -> tuple[Any, ...]: """ From 947488b2b50bd1303451a040d6ccb3e294997613 Mon Sep 17 00:00:00 2001 From: Francesco Lombardo Date: Tue, 10 Dec 2024 09:04:43 +0100 Subject: [PATCH 7/7] formatted using Ruff --- superset/charts/post_processing.py | 31 +++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/superset/charts/post_processing.py b/superset/charts/post_processing.py index b3a10473bfd2a..3d09947fbec89 100644 --- a/superset/charts/post_processing.py +++ b/superset/charts/post_processing.py @@ -26,12 +26,12 @@ for these chart types. """ -from flask import current_app from io import StringIO from typing import Any, Optional, TYPE_CHECKING, Union import numpy as np import pandas as pd +from flask import current_app from flask_babel import gettext as __ from superset.common.chart_data import ChartDataResultFormat @@ -45,7 +45,8 @@ from superset.connectors.sqla.models import BaseDatasource from superset.models.sql_lab import Query -csv_export_settings = current_app.config.get('CSV_EXPORT') +csv_export_settings = current_app.config.get("CSV_EXPORT") + def get_column_key(label: tuple[str, ...], metrics: list[str]) -> tuple[Any, ...]: """ @@ -329,11 +330,13 @@ def apply_post_process( if query["result_format"] == ChartDataResultFormat.JSON: df = pd.DataFrame.from_dict(data) elif query["result_format"] == ChartDataResultFormat.CSV: - df = pd.read_csv(StringIO(data), - sep=csv_export_settings.get('sep', ','), - encoding=csv_export_settings.get('encoding', 'utf-8'), - decimal=csv_export_settings.get('decimal', '.')) - + df = pd.read_csv( + StringIO(data), + sep=csv_export_settings.get("sep", ","), + encoding=csv_export_settings.get("encoding", "utf-8"), + decimal=csv_export_settings.get("decimal", "."), + ) + # convert all columns to verbose (label) name if datasource: df.rename(columns=datasource.data["verbose_map"], inplace=True) @@ -344,7 +347,7 @@ def apply_post_process( query["indexnames"] = list(processed_df.index) query["coltypes"] = extract_dataframe_dtypes(processed_df, datasource) query["rowcount"] = len(processed_df.index) - + if query["result_format"] == ChartDataResultFormat.JSON: # Flatten hierarchical columns/index since they are represented as # `Tuple[str]`. Otherwise encoding to JSON later will fail because @@ -362,13 +365,15 @@ def apply_post_process( for index in processed_df.index ] query["data"] = processed_df.to_dict() - + elif query["result_format"] == ChartDataResultFormat.CSV: buf = StringIO() - processed_df.to_csv(buf, - sep=csv_export_settings.get('sep', ','), - encoding=csv_export_settings.get('encoding', 'utf-8'), - decimal=csv_export_settings.get('decimal', '.')) + processed_df.to_csv( + buf, + sep=csv_export_settings.get("sep", ","), + encoding=csv_export_settings.get("encoding", "utf-8"), + decimal=csv_export_settings.get("decimal", "."), + ) buf.seek(0) query["data"] = buf.getvalue()