diff --git a/pipelines/datasets/br_cgu_servidores_executivo_federal/flows.py b/pipelines/datasets/br_cgu_servidores_executivo_federal/flows.py index d66994223..6237de430 100644 --- a/pipelines/datasets/br_cgu_servidores_executivo_federal/flows.py +++ b/pipelines/datasets/br_cgu_servidores_executivo_federal/flows.py @@ -60,7 +60,7 @@ ) date_start = datetime.date(2013, 1, 1) - date_end = datetime.date(2013, 2, 1) + date_end = datetime.date(2013, 12, 1) log_task(f"Starting download, {date_start}, {date_end}") sheets_info = download_files(date_start=date_start, date_end=date_end) diff --git a/pipelines/datasets/br_cgu_servidores_executivo_federal/utils.py b/pipelines/datasets/br_cgu_servidores_executivo_federal/utils.py index 3e4e3803c..185bbdbf8 100644 --- a/pipelines/datasets/br_cgu_servidores_executivo_federal/utils.py +++ b/pipelines/datasets/br_cgu_servidores_executivo_federal/utils.py @@ -172,10 +172,17 @@ def read_and_clean_csv( else None ) + cols_with_float_type = df_architecture.loc[ + df_architecture["bigquery_type"] == "float64", "name" + ].to_list() + + for col in cols_with_float_type: + df[col] = df[col].str.replace(",", ".").astype(float) + df["ano"] = date.year df["mes"] = date.month - if "origem" in df.columns: + if "origem" in df_architecture["name"].to_list(): df["origem"] = get_source(table_name, source) return df