From 56683515c7531d3c364a7b2b1d287e2e2be60425 Mon Sep 17 00:00:00 2001 From: folhesgabriel Date: Thu, 21 Sep 2023 07:40:14 -0300 Subject: [PATCH] atualiza tasks --- .../flows.py | 7 ++-- .../tasks.py | 42 ++----------------- 2 files changed, 6 insertions(+), 43 deletions(-) diff --git a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/flows.py b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/flows.py index d5b17c552..06aed6696 100644 --- a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/flows.py +++ b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/flows.py @@ -17,7 +17,7 @@ from pipelines.datasets.br_cvm_oferta_publica_distribuicao.tasks import ( crawl, clean_table_oferta_distribuicao, - extract_last_date, + get_today_date, ) from pipelines.utils.decorators import Flow from pipelines.utils.tasks import ( @@ -92,9 +92,8 @@ ) with case(update_metadata, True): - data = extract_last_date( - dataset_id, table_id, "basedosdados", var_name="data_abertura_processo" - ) + data = get_today_date() + update_django_metadata( dataset_id, table_id, diff --git a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/tasks.py b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/tasks.py index c2ad29cab..74792b2c0 100644 --- a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/tasks.py +++ b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/tasks.py @@ -71,43 +71,7 @@ def clean_table_oferta_distribuicao(root: str) -> str: @task -def extract_last_date( - dataset_id: str, - table_id: str, - billing_project_id: str, - var_name: str, -) -> str: - """ - Extracts the last update date of a given dataset table. - - Args: - dataset_id (str): The ID of the dataset. - table_id (str): The ID of the table. - billing_project_id (str): The billing project ID. - - Returns: - str: The last update date in the format 'yyyy-mm-dd'. - - Raises: - Exception: If an error occurs while extracting the last update date. - """ - log(f"Extracting last date from {dataset_id}.{table_id}") - query_bd = f""" - SELECT MAX({var_name}) as max_date - FROM - `{billing_project_id}.{dataset_id}.{table_id}` - """ - log(f"Query: {query_bd}") - - t = bd.read_sql( - query=query_bd, - billing_project_id=billing_project_id, - from_file=True, - ) - log(f"{t}") - - data = t["max_date"][0] - - log(f"A data mais recente da tabela é: {data}") +def get_today_date() -> str: + d = datetime.today() - return str(data) + return str(d.strftime("%Y-%m-%d"))