From d4c41543bd55e957f050e5abd1f226ede3165e41 Mon Sep 17 00:00:00 2001 From: uiro-bi Date: Thu, 14 Nov 2024 10:34:31 -0300 Subject: [PATCH 1/3] BugFix: reploy cvm --- pipelines/datasets/br_cvm_fi/tasks.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/pipelines/datasets/br_cvm_fi/tasks.py b/pipelines/datasets/br_cvm_fi/tasks.py index fb15d7125..54efbaab8 100644 --- a/pipelines/datasets/br_cvm_fi/tasks.py +++ b/pipelines/datasets/br_cvm_fi/tasks.py @@ -32,7 +32,10 @@ from pipelines.utils.utils import log, to_partitions -@task # noqa +@task( + max_retries=2, + retry_delay=timedelta(seconds=constants.TASK_RETRY_DELAY.value), +) # noqa def download_unzip_csv( url: str, files, chunk_size: int = 128, mkdir: bool = True, id="teste" ) -> str: @@ -114,7 +117,10 @@ def download_unzip_csv( return f"/tmp/data/br_cvm_fi/{id}/input/" -@task +@task( + max_retries=2, + retry_delay=timedelta(seconds=constants.TASK_RETRY_DELAY.value), +) def extract_links_and_dates(url) -> Tuple[pd.DataFrame, str]: """ Extracts all file names and their respective last update dates in a pandas dataframe. @@ -169,7 +175,10 @@ def extract_links_and_dates(url) -> Tuple[pd.DataFrame, str]: return df, data_maxima -@task +@task( + max_retries=2, + retry_delay=timedelta(seconds=constants.TASK_RETRY_DELAY.value), +) def generate_links_to_download(df: pd.DataFrame, max_date: datetime) -> list[str]: """ Checks for outdated tables. @@ -182,7 +191,10 @@ def generate_links_to_download(df: pd.DataFrame, max_date: datetime) -> list[str return lists -@task +@task( + max_retries=2, + retry_delay=timedelta(seconds=constants.TASK_RETRY_DELAY.value), +) def check_for_updates(df: pd.DataFrame): """ Checks for outdated tables. @@ -193,7 +205,10 @@ def check_for_updates(df: pd.DataFrame): -@task +@task( + max_retries=2, + retry_delay=timedelta(seconds=constants.TASK_RETRY_DELAY.value), +) def check_for_updates_ext(df): """ Checks for outdated tables in documentos_extratos_informacoes table. From 02d5b7d948672014289a259c34a8bfb7238b5a9c Mon Sep 17 00:00:00 2001 From: uiro-bi Date: Thu, 14 Nov 2024 10:58:56 -0300 Subject: [PATCH 2/3] fix tasks imports --- pipelines/datasets/br_cvm_fi/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/datasets/br_cvm_fi/tasks.py b/pipelines/datasets/br_cvm_fi/tasks.py index 54efbaab8..a0a501f6e 100644 --- a/pipelines/datasets/br_cvm_fi/tasks.py +++ b/pipelines/datasets/br_cvm_fi/tasks.py @@ -7,7 +7,7 @@ import os import re import zipfile -from datetime import datetime +from datetime import datetime, timedelta from typing import Tuple import pandas as pd @@ -30,7 +30,7 @@ sheet_to_df, ) from pipelines.utils.utils import log, to_partitions - +from pipelines.utils.constants import constants @task( max_retries=2, From 5764ff5d56d9e863cff58476ce30b645f7df249d Mon Sep 17 00:00:00 2001 From: uiro-bi Date: Thu, 14 Nov 2024 11:12:31 -0300 Subject: [PATCH 3/3] fix tasks imports --- pipelines/datasets/br_cvm_fi/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/datasets/br_cvm_fi/tasks.py b/pipelines/datasets/br_cvm_fi/tasks.py index a0a501f6e..14f9109d2 100644 --- a/pipelines/datasets/br_cvm_fi/tasks.py +++ b/pipelines/datasets/br_cvm_fi/tasks.py @@ -30,7 +30,7 @@ sheet_to_df, ) from pipelines.utils.utils import log, to_partitions -from pipelines.utils.constants import constants +from pipelines.constants import constants @task( max_retries=2,