From d7fa7edc01faf78d7fad6a3e817753a84c22d816 Mon Sep 17 00:00:00 2001 From: uiro-bi Date: Wed, 6 Nov 2024 16:37:33 -0300 Subject: [PATCH 1/2] fix: clean_data_and_make_partitions function --- pipelines/datasets/br_cvm_fi/tasks.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pipelines/datasets/br_cvm_fi/tasks.py b/pipelines/datasets/br_cvm_fi/tasks.py index b6b19ebb7..a01c14a05 100644 --- a/pipelines/datasets/br_cvm_fi/tasks.py +++ b/pipelines/datasets/br_cvm_fi/tasks.py @@ -225,10 +225,9 @@ def clean_data_and_make_partitions(path: str, table_id: str) -> str: for file in files: df = pd.read_csv(f"{path}{file}", sep=";") log(f"File {file} read.") + log(df.columns) - df.rename(columns={'CNPJ_FUNDO_CLASSE':'CNPJ_FUNDO'}, inplace=True) - - df["CNPJ_FUNDO"] = df["CNPJ_FUNDO"].str.replace(r"[/.-]", "") + df["CNPJ_FUNDO_CLASSE"] = df["CNPJ_FUNDO_CLASSE"].str.replace(r"[/.-]", "") df = rename_columns(df_arq, df) From 64afabcaf556b751e9ea0250fdda906b707a4829 Mon Sep 17 00:00:00 2001 From: uiro-bi Date: Wed, 6 Nov 2024 17:29:05 -0300 Subject: [PATCH 2/2] feat: drop columns in clean_data_and_make_partitions --- pipelines/datasets/br_cvm_fi/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelines/datasets/br_cvm_fi/tasks.py b/pipelines/datasets/br_cvm_fi/tasks.py index a01c14a05..fb15d7125 100644 --- a/pipelines/datasets/br_cvm_fi/tasks.py +++ b/pipelines/datasets/br_cvm_fi/tasks.py @@ -225,10 +225,11 @@ def clean_data_and_make_partitions(path: str, table_id: str) -> str: for file in files: df = pd.read_csv(f"{path}{file}", sep=";") log(f"File {file} read.") - log(df.columns) df["CNPJ_FUNDO_CLASSE"] = df["CNPJ_FUNDO_CLASSE"].str.replace(r"[/.-]", "") + df.drop("ID_SUBCLASSE",axis=1, inplace=True) + df = rename_columns(df_arq, df) df = check_and_create_column(