diff --git a/pipelines/datasets/br_cvm_administradores_carteira/tasks.py b/pipelines/datasets/br_cvm_administradores_carteira/tasks.py index f6268bf0f..8128d25b8 100644 --- a/pipelines/datasets/br_cvm_administradores_carteira/tasks.py +++ b/pipelines/datasets/br_cvm_administradores_carteira/tasks.py @@ -161,7 +161,7 @@ def extract_last_date( table_id: str, billing_project_id: str, var_name: str, -) -> datetime: +) -> str: """ Extracts the last update date of a given dataset table. diff --git a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/flows.py b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/flows.py index 011d164f8..d5b17c552 100644 --- a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/flows.py +++ b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/flows.py @@ -44,7 +44,7 @@ materialize_after_dump = Parameter( "materialize after dump", default=True, required=False ) - dbt_alias = Parameter("dbt_alias", default=False, required=False) + dbt_alias = Parameter("dbt_alias", default=True, required=False) update_metadata = Parameter("update_metadata", default=False, required=False) rename_flow_run = rename_current_flow_run_dataset_table( diff --git a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/tasks.py b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/tasks.py index e77a601ac..c2ad29cab 100644 --- a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/tasks.py +++ b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/tasks.py @@ -4,7 +4,6 @@ """ import os - import pandas as pd from pandas.api.types import is_string_dtype from prefect import task @@ -77,7 +76,7 @@ def extract_last_date( table_id: str, billing_project_id: str, var_name: str, -) -> datetime: +) -> str: """ Extracts the last update date of a given dataset table. @@ -92,18 +91,20 @@ def extract_last_date( Raises: Exception: If an error occurs while extracting the last update date. """ - + log(f"Extracting last date from {dataset_id}.{table_id}") query_bd = f""" SELECT MAX({var_name}) as max_date FROM `{billing_project_id}.{dataset_id}.{table_id}` """ + log(f"Query: {query_bd}") t = bd.read_sql( query=query_bd, billing_project_id=billing_project_id, from_file=True, ) + log(f"{t}") data = t["max_date"][0]