From beec4870ea402f1700029383ee92e4482869a114 Mon Sep 17 00:00:00 2001 From: tricktx Date: Sun, 15 Dec 2024 11:27:51 -0300 Subject: [PATCH] fix anatel time out --- pipelines/utils/crawler_anatel/banda_larga_fixa/flows.py | 5 +++-- pipelines/utils/crawler_anatel/banda_larga_fixa/utils.py | 7 ++++--- pipelines/utils/crawler_anatel/telefonia_movel/flows.py | 3 ++- pipelines/utils/crawler_anatel/telefonia_movel/utils.py | 4 ++-- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pipelines/utils/crawler_anatel/banda_larga_fixa/flows.py b/pipelines/utils/crawler_anatel/banda_larga_fixa/flows.py index a97fb40e6..8f32fa13a 100644 --- a/pipelines/utils/crawler_anatel/banda_larga_fixa/flows.py +++ b/pipelines/utils/crawler_anatel/banda_larga_fixa/flows.py @@ -56,7 +56,7 @@ # https://discourse.prefect.io/t/my-parameter-value-shows-the-same-date-every-day-how-can-i-set-parameter-value-dynamically/99 ##### - new_ano = get_year_and_unzip(day=ano) + new_ano = get_year_and_unzip(day=ano, upstream_tasks=[rename_flow_run]) update_tables = get_max_date_in_table_microdados(ano=new_ano, table_id=table_id, upstream_tasks=[new_ano]) @@ -64,7 +64,8 @@ dataset_id = dataset_id, table_id = table_id, data_source_max_date = update_tables, - date_format = "%Y-%m") + date_format = "%Y-%m", + upstream_tasks=[update_tables]) with case(get_max_date, True): filepath = join_tables_in_function( diff --git a/pipelines/utils/crawler_anatel/banda_larga_fixa/utils.py b/pipelines/utils/crawler_anatel/banda_larga_fixa/utils.py index fa6c28112..62f86b2b0 100644 --- a/pipelines/utils/crawler_anatel/banda_larga_fixa/utils.py +++ b/pipelines/utils/crawler_anatel/banda_larga_fixa/utils.py @@ -55,18 +55,19 @@ def download_zip_file(path): driver.get(anatel_constants.URL.value) driver.maximize_window() - WebDriverWait(driver, 60).until( + WebDriverWait(driver, 300).until( EC.element_to_be_clickable( (By.XPATH, '/html/body/div/section/div/div[3]/div[2]/div[3]/div[2]/header/button') ) ).click() - WebDriverWait(driver, 60).until( + WebDriverWait(driver, 300).until( EC.element_to_be_clickable( (By.XPATH, '/html/body/div/section/div/div[3]/div[2]/div[3]/div[2]/div/div[1]/div[2]/div[2]/div/button') ) ).click() - time.sleep(300) + time.sleep(150) + log(os.listdir(path)) def unzip_file(): download_zip_file(path=anatel_constants.INPUT_PATH.value) diff --git a/pipelines/utils/crawler_anatel/telefonia_movel/flows.py b/pipelines/utils/crawler_anatel/telefonia_movel/flows.py index 2c0b6255e..f5d3c8ec1 100644 --- a/pipelines/utils/crawler_anatel/telefonia_movel/flows.py +++ b/pipelines/utils/crawler_anatel/telefonia_movel/flows.py @@ -59,7 +59,7 @@ # Function dynamic parameters # https://discourse.prefect.io/t/my-parameter-value-shows-the-same-date-every-day-how-can-i-set-parameter-value-dynamically/99 ##### - unzip_task = unzip() + unzip_task = unzip(upstream_tasks=[rename_flow_run]) new_year = get_year_full(ano, upstream_tasks=[unzip_task]) new_semester = get_semester(semestre, upstream_tasks=[new_year]) @@ -72,6 +72,7 @@ table_id = table_id, data_source_max_date = update_tables, date_format = "%Y-%m", + upstream_tasks=[update_tables] ) with case(get_max_date, True): diff --git a/pipelines/utils/crawler_anatel/telefonia_movel/utils.py b/pipelines/utils/crawler_anatel/telefonia_movel/utils.py index 1d44609fc..e83159cf3 100644 --- a/pipelines/utils/crawler_anatel/telefonia_movel/utils.py +++ b/pipelines/utils/crawler_anatel/telefonia_movel/utils.py @@ -50,12 +50,12 @@ def download_zip_file(path): driver = webdriver.Chrome(options=options) driver.get(anatel_constants.URL.value) driver.maximize_window() - WebDriverWait(driver, 60).until( + WebDriverWait(driver, 300).until( EC.element_to_be_clickable( (By.XPATH, '/html/body/div/section/div/div[3]/div[2]/div[3]/div[2]/header/button') ) ).click() - WebDriverWait(driver, 60).until( + WebDriverWait(driver, 300).until( EC.element_to_be_clickable( (By.XPATH, '/html/body/div/section/div/div[3]/div[2]/div[3]/div[2]/div/div[1]/div[2]/div[2]/div/button') )