From ac5a004b836f23ea3959b04fa6aff6e65250e415 Mon Sep 17 00:00:00 2001 From: Vinicius Date: Wed, 27 Sep 2023 12:03:10 -0300 Subject: [PATCH 1/2] feat: increase linters --- .pre-commit-config.yaml | 11 +++++++++++ scripts/lint.py | 3 +++ 2 files changed, 14 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7e8763bee..b6b1e2aa6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,6 +8,15 @@ repos: - id: fix-encoding-pragma # fixes encoding pragma - id: no-commit-to-branch # prevents committing to protected branches - id: trailing-whitespace # prevents trailing whitespace + - repo: https://github.com/python-poetry/poetry + rev: 1.6.0 + hooks: + - id: poetry-check + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + args: [--profile, black, --skip, "pipelines/{{cookiecutter.project_name}}"] - repo: https://github.com/psf/black rev: 23.9.1 hooks: @@ -17,7 +26,9 @@ repos: rev: v2.2.1 hooks: - id: autoflake + exclude: 'pipelines\/\{\{cookiecutter\.project_name\}\}.*' - repo: https://github.com/PyCQA/flake8 rev: 6.1.0 hooks: - id: flake8 + exclude: 'pipelines\/\{\{cookiecutter\.project_name\}\}.*' diff --git a/scripts/lint.py b/scripts/lint.py index 17af43134..eb60aa416 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -10,9 +10,12 @@ def run(*args): def main(): """Lint all python files in the project""" code = 0 + code |= run(["poetry", "check"]) code |= run( [ "isort", + "--profile", + "black", "--skip", "pipelines/{{cookiecutter.project_name}}", "--check-only", From ec6beb9ca404048900d1e3e85c6c2c1e85388ede Mon Sep 17 00:00:00 2001 From: Vinicius Date: Wed, 27 Sep 2023 12:11:29 -0300 Subject: [PATCH 2/2] feat: apply linters --- .../workflows/scripts/code_tree_analysis.py | 4 +- .github/workflows/scripts/register_flows.py | 25 ++------ .../workflows/scripts/replace_docker_tag.py | 1 - manage.py | 2 +- pipelines/datasets/__init__.py | 60 +++++++++---------- pipelines/datasets/botdosdados/flows.py | 13 ++-- pipelines/datasets/botdosdados/schedules.py | 3 +- pipelines/datasets/botdosdados/tasks.py | 19 +++--- .../br_anatel_banda_larga_fixa/flows.py | 24 ++++---- .../br_anatel_banda_larga_fixa/schedules.py | 5 +- .../br_anatel_banda_larga_fixa/tasks.py | 15 ++--- .../br_anatel_banda_larga_fixa/utils.py | 9 +-- .../br_anatel_telefonia_movel/flows.py | 22 ++++--- .../br_anatel_telefonia_movel/schedules.py | 6 +- .../br_anatel_telefonia_movel/tasks.py | 16 ++--- .../br_anatel_telefonia_movel/utils.py | 9 +-- .../br_anp_precos_combustiveis/flows.py | 31 +++++----- .../br_anp_precos_combustiveis/schedules.py | 2 + .../br_anp_precos_combustiveis/tasks.py | 28 +++++---- .../br_anp_precos_combustiveis/utils.py | 12 ++-- .../datasets/br_ans_beneficiario/flows.py | 26 ++++---- .../datasets/br_ans_beneficiario/schedules.py | 5 +- .../datasets/br_ans_beneficiario/tasks.py | 17 +++--- .../datasets/br_ans_beneficiario/utils.py | 18 +++--- pipelines/datasets/br_b3_cotacoes/flows.py | 21 +++---- .../datasets/br_b3_cotacoes/schedules.py | 2 + pipelines/datasets/br_b3_cotacoes/tasks.py | 10 ++-- pipelines/datasets/br_b3_cotacoes/utils.py | 12 ++-- pipelines/datasets/br_bcb_agencia/flows.py | 28 +++------ .../datasets/br_bcb_agencia/schedules.py | 4 +- pipelines/datasets/br_bcb_agencia/tasks.py | 34 +++++------ pipelines/datasets/br_bcb_agencia/utils.py | 20 +++---- pipelines/datasets/br_bcb_estban/flows.py | 30 ++++------ pipelines/datasets/br_bcb_estban/schedules.py | 5 +- pipelines/datasets/br_bcb_estban/tasks.py | 36 +++++------ pipelines/datasets/br_bcb_estban/utils.py | 22 +++---- .../datasets/br_bcb_taxa_cambio/flows.py | 20 ++++--- .../datasets/br_bcb_taxa_cambio/schedules.py | 6 +- .../datasets/br_bcb_taxa_cambio/tasks.py | 5 +- .../datasets/br_bcb_taxa_cambio/utils.py | 14 +++-- pipelines/datasets/br_bcb_taxa_selic/flows.py | 25 ++++---- .../datasets/br_bcb_taxa_selic/schedules.py | 6 +- pipelines/datasets/br_bcb_taxa_selic/tasks.py | 1 + pipelines/datasets/br_bcb_taxa_selic/utils.py | 14 ++--- pipelines/datasets/br_bd_indicadores/flows.py | 16 ++--- .../datasets/br_bd_indicadores/schedules.py | 4 +- pipelines/datasets/br_bd_indicadores/tasks.py | 18 +++--- pipelines/datasets/br_bd_indicadores/utils.py | 5 +- pipelines/datasets/br_bd_metadados/flows.py | 14 ++--- .../datasets/br_bd_metadados/schedules.py | 2 +- pipelines/datasets/br_bd_metadados/tasks.py | 6 +- pipelines/datasets/br_bd_metadados/utils.py | 1 + .../br_cgu_pessoal_executivo_federal/flows.py | 24 ++++---- .../schedules.py | 4 +- .../br_cgu_pessoal_executivo_federal/tasks.py | 2 +- .../br_cvm_administradores_carteira/flows.py | 11 ++-- .../schedules.py | 2 +- .../br_cvm_administradores_carteira/tasks.py | 3 +- pipelines/datasets/br_cvm_fi/flows.py | 53 ++++++++-------- pipelines/datasets/br_cvm_fi/schedules.py | 6 +- pipelines/datasets/br_cvm_fi/tasks.py | 31 +++++----- pipelines/datasets/br_cvm_fi/utils.py | 7 ++- .../flows.py | 14 ++--- .../schedules.py | 4 +- .../tasks.py | 5 +- pipelines/datasets/br_fgv_igp/flows.py | 14 ++--- pipelines/datasets/br_ibge_inpc/flows.py | 9 ++- pipelines/datasets/br_ibge_ipca/flows.py | 9 ++- pipelines/datasets/br_ibge_ipca15/flows.py | 9 ++- pipelines/datasets/br_ibge_pnadc/flows.py | 18 +++--- pipelines/datasets/br_ibge_pnadc/tasks.py | 11 ++-- pipelines/datasets/br_inmet_bdmep/flows.py | 18 +++--- .../datasets/br_inmet_bdmep/schedules.py | 6 +- pipelines/datasets/br_inmet_bdmep/tasks.py | 23 ++++--- pipelines/datasets/br_inmet_bdmep/utils.py | 9 +-- pipelines/datasets/br_jota/flows.py | 3 +- pipelines/datasets/br_jota/schedules.py | 4 +- pipelines/datasets/br_me_caged/flows.py | 9 +-- pipelines/datasets/br_me_caged/schedules.py | 4 +- pipelines/datasets/br_me_caged/tasks.py | 9 +-- pipelines/datasets/br_me_cnpj/flows.py | 37 ++++++------ pipelines/datasets/br_me_cnpj/schedules.py | 7 ++- pipelines/datasets/br_me_cnpj/tasks.py | 22 +++---- pipelines/datasets/br_me_cnpj/utils.py | 20 +++---- pipelines/datasets/br_me_comex_stat/flows.py | 24 ++++---- .../datasets/br_me_comex_stat/schedules.py | 3 +- pipelines/datasets/br_me_comex_stat/tasks.py | 22 +++---- pipelines/datasets/br_me_comex_stat/utils.py | 8 +-- .../br_mercadolivre_ofertas/decorators.py | 3 +- .../datasets/br_mercadolivre_ofertas/flows.py | 28 ++++----- .../br_mercadolivre_ofertas/schedules.py | 1 - .../datasets/br_mercadolivre_ofertas/tasks.py | 10 ++-- .../datasets/br_mercadolivre_ofertas/utils.py | 12 ++-- .../br_mg_belohorizonte_smfa_iptu/flows.py | 20 +++---- .../schedules.py | 4 +- .../br_mg_belohorizonte_smfa_iptu/tasks.py | 14 +++-- .../br_mg_belohorizonte_smfa_iptu/utils.py | 10 ++-- .../br_mp_pep_cargos_funcoes/flows.py | 28 ++++----- .../br_mp_pep_cargos_funcoes/schedules.py | 2 + .../br_mp_pep_cargos_funcoes/tasks.py | 18 +++--- .../br_mp_pep_cargos_funcoes/utils.py | 1 + pipelines/datasets/br_ms_cnes/flows.py | 41 +++++++------ pipelines/datasets/br_ms_cnes/schedules.py | 7 ++- pipelines/datasets/br_ms_cnes/tasks.py | 23 ++++--- pipelines/datasets/br_ms_cnes/utils.py | 4 +- .../br_ons_avaliacao_operacao/flows.py | 25 ++++---- .../br_ons_avaliacao_operacao/schedules.py | 5 +- .../br_ons_avaliacao_operacao/tasks.py | 18 +++--- .../br_ons_avaliacao_operacao/utils.py | 13 ++-- .../br_ons_estimativa_custos/flows.py | 23 +++---- .../br_ons_estimativa_custos/schedules.py | 2 +- .../br_ons_estimativa_custos/tasks.py | 17 +++--- .../br_ons_estimativa_custos/utils.py | 13 ++-- .../datasets/br_poder360_pesquisas/flows.py | 12 ++-- .../br_poder360_pesquisas/schedules.py | 1 - .../datasets/br_poder360_pesquisas/tasks.py | 9 +-- pipelines/datasets/br_rf_cafir/flows.py | 12 ++-- pipelines/datasets/br_rf_cafir/schedules.py | 7 ++- pipelines/datasets/br_rf_cafir/tasks.py | 15 ++--- pipelines/datasets/br_rf_cafir/utils.py | 12 ++-- .../br_rj_isp_estatisticas_seguranca/flows.py | 47 +++++++-------- .../schedules.py | 4 +- .../br_rj_isp_estatisticas_seguranca/tasks.py | 17 +++--- .../br_rj_isp_estatisticas_seguranca/utils.py | 6 +- .../br_sp_saopaulo_dieese_icv/flows.py | 14 ++--- .../br_sp_saopaulo_dieese_icv/schedules.py | 6 +- .../br_sp_saopaulo_dieese_icv/tasks.py | 3 +- pipelines/datasets/br_tse_eleicoes/flows.py | 22 ++++--- .../datasets/br_tse_eleicoes/schedules.py | 4 +- pipelines/datasets/br_tse_eleicoes/tasks.py | 22 +++---- pipelines/datasets/cross_update/flows.py | 6 +- pipelines/datasets/cross_update/schedules.py | 2 +- pipelines/datasets/cross_update/tasks.py | 2 +- pipelines/datasets/delete_flows/flows.py | 2 +- pipelines/datasets/delete_flows/schedules.py | 5 +- pipelines/datasets/delete_flows/tasks.py | 4 +- pipelines/datasets/fundacao_lemann/flows.py | 4 +- .../datasets/fundacao_lemann/schedules.py | 6 +- .../constants.py | 2 +- .../decorators.py | 3 +- .../mundo_transfermarkt_competicoes/flows.py | 40 ++++++------- .../schedules.py | 5 +- .../mundo_transfermarkt_competicoes/tasks.py | 12 ++-- .../mundo_transfermarkt_competicoes/utils.py | 8 ++- pipelines/datasets/test_pipeline/flows.py | 2 +- pipelines/datasets/test_pipeline/schedules.py | 4 +- pipelines/datasets/test_pipeline/tasks.py | 3 +- pipelines/utils/__init__.py | 4 +- .../apply_architecture_to_dataframe/utils.py | 2 + .../utils/crawler_ibge_inflacao/flows.py | 16 ++--- .../utils/crawler_ibge_inflacao/utils.py | 4 +- pipelines/utils/custom.py | 1 - pipelines/utils/dump_to_gcs/tasks.py | 2 +- pipelines/utils/execute_dbt_model/flows.py | 6 +- pipelines/utils/execute_dbt_model/tasks.py | 4 +- pipelines/utils/execute_dbt_model/utils.py | 2 +- pipelines/utils/metadata/flows.py | 11 ++-- pipelines/utils/metadata/tasks.py | 16 ++--- pipelines/utils/metadata/utils.py | 5 +- pipelines/utils/tasks.py | 18 +++--- .../utils/temporal_coverage_updater/flows.py | 7 +-- .../utils/temporal_coverage_updater/tasks.py | 12 ++-- .../utils/temporal_coverage_updater/utils.py | 10 ++-- pipelines/utils/traceroute/tasks.py | 6 +- pipelines/utils/traceroute/utils.py | 1 - pipelines/utils/utils.py | 3 +- .../{{cookiecutter.project_name}}/flows.py | 4 +- .../schedules.py | 4 +- .../{{cookiecutter.workspace_name}}/flows.py | 6 +- .../schedules.py | 4 +- tests/test_igp.py | 1 - 171 files changed, 1002 insertions(+), 1029 deletions(-) diff --git a/.github/workflows/scripts/code_tree_analysis.py b/.github/workflows/scripts/code_tree_analysis.py index 19cc52a35..a2f1e87d3 100644 --- a/.github/workflows/scripts/code_tree_analysis.py +++ b/.github/workflows/scripts/code_tree_analysis.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- import ast -from pathlib import Path import sys +from pathlib import Path from typing import List, Tuple, Union import networkx as nx -from prefect import Flow import yaml +from prefect import Flow message_id = 0 diff --git a/.github/workflows/scripts/register_flows.py b/.github/workflows/scripts/register_flows.py index 3ee6a3a7a..52db88165 100644 --- a/.github/workflows/scripts/register_flows.py +++ b/.github/workflows/scripts/register_flows.py @@ -4,41 +4,28 @@ """ import ast -from collections import ( - Counter, - defaultdict, -) import glob import hashlib import json import os -from pathlib import Path import runpy import sys -from time import sleep import traceback -from typing import ( - Dict, - List, - Tuple, - Union, -) +from collections import Counter, defaultdict +from pathlib import Path +from time import sleep +from typing import Dict, List, Tuple, Union import box -from loguru import logger import prefect +from loguru import logger from prefect.run_configs import UniversalRun from prefect.storage import Local -from prefect.utilities.graphql import ( - compress, - EnumValue, - with_args, -) +from prefect.utilities.graphql import EnumValue, compress, with_args from typer import Typer import pipelines # DO NOT REMOVE THIS LINE - app = Typer() FlowLike = Union[box.Box, "prefect.Flow"] diff --git a/.github/workflows/scripts/replace_docker_tag.py b/.github/workflows/scripts/replace_docker_tag.py index 18c14f7e9..a340a5514 100644 --- a/.github/workflows/scripts/replace_docker_tag.py +++ b/.github/workflows/scripts/replace_docker_tag.py @@ -8,7 +8,6 @@ from sys import argv, exit from typing import List - FILE_PATH = Path("./pipelines/constants.py") REPLACE_TAG = "AUTO_REPLACE_DOCKER_TAG" REPLACE_IMAGE = "AUTO_REPLACE_DOCKER_IMAGE" diff --git a/manage.py b/manage.py index 08b1a6fd0..775bc1ce5 100644 --- a/manage.py +++ b/manage.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- +import pkgutil from os import path from pathlib import Path -import pkgutil from sys import exit from uuid import uuid4 diff --git a/pipelines/datasets/__init__.py b/pipelines/datasets/__init__.py index bb4e04cdf..c689a187e 100644 --- a/pipelines/datasets/__init__.py +++ b/pipelines/datasets/__init__.py @@ -6,44 +6,44 @@ # Automatically managed, please do not touch ############################################################################### +from pipelines.datasets.botdosdados.flows import * +from pipelines.datasets.br_anatel_banda_larga_fixa.flows import * +from pipelines.datasets.br_anatel_telefonia_movel.flows import * +from pipelines.datasets.br_anp_precos_combustiveis.flows import * +from pipelines.datasets.br_ans_beneficiario.flows import * +from pipelines.datasets.br_b3_cotacoes.flows import * +from pipelines.datasets.br_bcb_agencia.flows import * +from pipelines.datasets.br_bcb_estban.flows import * +from pipelines.datasets.br_bcb_taxa_cambio.flows import * +from pipelines.datasets.br_bcb_taxa_selic.flows import * +from pipelines.datasets.br_bd_indicadores.flows import * +from pipelines.datasets.br_bd_metadados.flows import * +from pipelines.datasets.br_cgu_pessoal_executivo_federal.flows import * from pipelines.datasets.br_cvm_administradores_carteira.flows import * +from pipelines.datasets.br_cvm_fi.flows import * from pipelines.datasets.br_cvm_oferta_publica_distribuicao.flows import * -from pipelines.datasets.br_me_comex_stat.flows import * +from pipelines.datasets.br_fgv_igp.flows import * from pipelines.datasets.br_ibge_inpc.flows import * -from pipelines.datasets.br_ibge_ipca.flows import * from pipelines.datasets.br_ibge_ipca15.flows import * -from pipelines.datasets.br_sp_saopaulo_dieese_icv.flows import * -from pipelines.datasets.br_bd_indicadores.flows import * -from pipelines.datasets.br_bd_metadados.flows import * -from pipelines.datasets.br_poder360_pesquisas.flows import * +from pipelines.datasets.br_ibge_ipca.flows import * +from pipelines.datasets.br_ibge_pnadc.flows import * from pipelines.datasets.br_inmet_bdmep.flows import * -from pipelines.datasets.botdosdados.flows import * -from pipelines.datasets.br_cgu_pessoal_executivo_federal.flows import * -from pipelines.datasets.fundacao_lemann.flows import * -from pipelines.datasets.br_tse_eleicoes.flows import * -from pipelines.datasets.delete_flows.flows import * from pipelines.datasets.br_jota.flows import * -from pipelines.datasets.br_fgv_igp.flows import * from pipelines.datasets.br_me_caged.flows import * -from pipelines.datasets.br_ibge_pnadc.flows import * -from pipelines.datasets.cross_update.flows import * -from pipelines.datasets.br_bcb_estban.flows import * +from pipelines.datasets.br_me_cnpj.flows import * +from pipelines.datasets.br_me_comex_stat.flows import * +from pipelines.datasets.br_mercadolivre_ofertas.flows import * +from pipelines.datasets.br_mg_belohorizonte_smfa_iptu.flows import * +from pipelines.datasets.br_mp_pep_cargos_funcoes.flows import * from pipelines.datasets.br_ms_cnes.flows import * -from pipelines.datasets.br_rj_isp_estatisticas_seguranca.flows import * -from pipelines.datasets.br_anatel_banda_larga_fixa.flows import * -from pipelines.datasets.br_bcb_agencia.flows import * -from pipelines.datasets.br_cvm_fi.flows import * from pipelines.datasets.br_ons_avaliacao_operacao.flows import * from pipelines.datasets.br_ons_estimativa_custos.flows import * -from pipelines.datasets.br_b3_cotacoes.flows import * -from pipelines.datasets.br_anatel_telefonia_movel.flows import * -from pipelines.datasets.br_mercadolivre_ofertas.flows import * -from pipelines.datasets.br_bcb_taxa_cambio.flows import * -from pipelines.datasets.br_bcb_taxa_selic.flows import * -from pipelines.datasets.mundo_transfermarkt_competicoes.flows import * -from pipelines.datasets.br_me_cnpj.flows import * -from pipelines.datasets.br_mp_pep_cargos_funcoes.flows import * -from pipelines.datasets.br_ans_beneficiario.flows import * -from pipelines.datasets.br_anp_precos_combustiveis.flows import * -from pipelines.datasets.br_mg_belohorizonte_smfa_iptu.flows import * +from pipelines.datasets.br_poder360_pesquisas.flows import * from pipelines.datasets.br_rf_cafir.flows import * +from pipelines.datasets.br_rj_isp_estatisticas_seguranca.flows import * +from pipelines.datasets.br_sp_saopaulo_dieese_icv.flows import * +from pipelines.datasets.br_tse_eleicoes.flows import * +from pipelines.datasets.cross_update.flows import * +from pipelines.datasets.delete_flows.flows import * +from pipelines.datasets.fundacao_lemann.flows import * +from pipelines.datasets.mundo_transfermarkt_competicoes.flows import * diff --git a/pipelines/datasets/botdosdados/flows.py b/pipelines/datasets/botdosdados/flows.py index e1fa41785..7baea4395 100644 --- a/pipelines/datasets/botdosdados/flows.py +++ b/pipelines/datasets/botdosdados/flows.py @@ -2,22 +2,23 @@ """ Flows for botdosdados """ -from prefect import case, Parameter +from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS + from pipelines.constants import constants +from pipelines.datasets.botdosdados.schedules import every_day from pipelines.datasets.botdosdados.tasks import ( - was_table_updated, + echo, get_credentials, - message_last_tables, message_inflation_plot, + message_last_tables, send_media, send_thread, - echo, + was_table_updated, ) -from pipelines.datasets.botdosdados.schedules import every_day from pipelines.utils.decorators import Flow -from pipelines.utils.tasks import rename_current_flow_run, get_date_time_str +from pipelines.utils.tasks import get_date_time_str, rename_current_flow_run with Flow( name="botdosdados.message_inflation", code_owners=["lucas_cr"] diff --git a/pipelines/datasets/botdosdados/schedules.py b/pipelines/datasets/botdosdados/schedules.py index efc8433db..aed8c5c47 100755 --- a/pipelines/datasets/botdosdados/schedules.py +++ b/pipelines/datasets/botdosdados/schedules.py @@ -5,12 +5,11 @@ from datetime import datetime -from prefect.schedules import Schedule, filters, adjustments +from prefect.schedules import Schedule, adjustments, filters from prefect.schedules.clocks import CronClock from pipelines.constants import constants - every_day = Schedule( clocks=[ CronClock( diff --git a/pipelines/datasets/botdosdados/tasks.py b/pipelines/datasets/botdosdados/tasks.py index 0344941c1..8da7b179b 100644 --- a/pipelines/datasets/botdosdados/tasks.py +++ b/pipelines/datasets/botdosdados/tasks.py @@ -3,20 +3,21 @@ Tasks for botdosdados """ import os -from typing import Tuple -from datetime import timedelta, datetime from collections import defaultdict +from datetime import datetime, timedelta +from typing import Tuple -import tweepy -from tweepy.auth import OAuthHandler -from prefect import task -from basedosdados.download.metadata import _safe_fetch -import pandas as pd -import numpy as np import matplotlib.pyplot as plt +import numpy as np +import pandas as pd import seaborn as sns -from pipelines.utils.utils import log, get_storage_blobs, get_credentials_from_secret +import tweepy +from basedosdados.download.metadata import _safe_fetch +from prefect import task +from tweepy.auth import OAuthHandler + from pipelines.constants import constants +from pipelines.utils.utils import get_credentials_from_secret, get_storage_blobs, log # pylint: disable=C0103 diff --git a/pipelines/datasets/br_anatel_banda_larga_fixa/flows.py b/pipelines/datasets/br_anatel_banda_larga_fixa/flows.py index 83cff365b..c4b85f662 100644 --- a/pipelines/datasets/br_anatel_banda_larga_fixa/flows.py +++ b/pipelines/datasets/br_anatel_banda_larga_fixa/flows.py @@ -4,31 +4,31 @@ """ from datetime import timedelta + from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS from prefect.tasks.prefect import create_flow_run, wait_for_flow_run + from pipelines.constants import constants -from pipelines.utils.tasks import update_django_metadata -from pipelines.utils.constants import constants as utils_constants -from pipelines.utils.decorators import Flow -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.datasets.br_anatel_banda_larga_fixa.schedules import ( + every_month_anatel_microdados, +) from pipelines.datasets.br_anatel_banda_larga_fixa.tasks import ( + get_today_date_atualizado, treatment, treatment_br, - treatment_uf, treatment_municipio, - get_today_date_atualizado, -) - -from pipelines.datasets.br_anatel_banda_larga_fixa.schedules import ( - every_month_anatel_microdados, + treatment_uf, ) - +from pipelines.utils.constants import constants as utils_constants +from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, + rename_current_flow_run_dataset_table, + update_django_metadata, ) with Flow( diff --git a/pipelines/datasets/br_anatel_banda_larga_fixa/schedules.py b/pipelines/datasets/br_anatel_banda_larga_fixa/schedules.py index 586770201..9efa03057 100644 --- a/pipelines/datasets/br_anatel_banda_larga_fixa/schedules.py +++ b/pipelines/datasets/br_anatel_banda_larga_fixa/schedules.py @@ -4,9 +4,10 @@ """ from datetime import datetime + from prefect.schedules import Schedule -from prefect.schedules.clocks import CronClock -from prefect.schedules.clocks import IntervalClock +from prefect.schedules.clocks import CronClock, IntervalClock + from pipelines.constants import constants every_month_anatel_microdados = Schedule( diff --git a/pipelines/datasets/br_anatel_banda_larga_fixa/tasks.py b/pipelines/datasets/br_anatel_banda_larga_fixa/tasks.py index 655c53d35..42a9b9482 100644 --- a/pipelines/datasets/br_anatel_banda_larga_fixa/tasks.py +++ b/pipelines/datasets/br_anatel_banda_larga_fixa/tasks.py @@ -2,27 +2,24 @@ """ Tasks for br_anatel_banda_larga_fixa """ -import pandas as pd -import numpy as np import os - from datetime import datetime, timedelta -from dateutil.relativedelta import relativedelta +import numpy as np +import pandas as pd +from dateutil.relativedelta import relativedelta from prefect import task + +from pipelines.constants import constants from pipelines.datasets.br_anatel_banda_larga_fixa.constants import ( constants as anatel_constants, ) -from pipelines.utils.utils import ( - to_partitions, - log, -) from pipelines.datasets.br_anatel_banda_larga_fixa.utils import ( check_and_create_column, download_and_unzip, to_partitions_microdados, ) -from pipelines.constants import constants +from pipelines.utils.utils import log, to_partitions @task( diff --git a/pipelines/datasets/br_anatel_banda_larga_fixa/utils.py b/pipelines/datasets/br_anatel_banda_larga_fixa/utils.py index 2fc71e3f4..05f45f14b 100644 --- a/pipelines/datasets/br_anatel_banda_larga_fixa/utils.py +++ b/pipelines/datasets/br_anatel_banda_larga_fixa/utils.py @@ -2,13 +2,14 @@ """ General purpose functions for the br_anatel_banda_larga_fixa """ +import os from io import BytesIO -from zipfile import ZipFile +from pathlib import Path from urllib.request import urlopen -import os -import pandas as pd +from zipfile import ZipFile + import numpy as np -from pathlib import Path +import pandas as pd def download_and_unzip(url, path): diff --git a/pipelines/datasets/br_anatel_telefonia_movel/flows.py b/pipelines/datasets/br_anatel_telefonia_movel/flows.py index c2a8b75f1..072503bcc 100644 --- a/pipelines/datasets/br_anatel_telefonia_movel/flows.py +++ b/pipelines/datasets/br_anatel_telefonia_movel/flows.py @@ -3,35 +3,33 @@ Flows for dataset br_anatel_telefonia_movel """ -from prefect.run_configs import KubernetesRun -from prefect.storage import GCS from datetime import timedelta + from prefect import Parameter, case +from prefect.run_configs import KubernetesRun +from prefect.storage import GCS from prefect.tasks.prefect import create_flow_run, wait_for_flow_run -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.tasks import update_django_metadata -from pipelines.utils.constants import constants as utils_constants from pipelines.constants import constants from pipelines.datasets.br_anatel_telefonia_movel.constants import ( constants as anatel_constants, ) +from pipelines.datasets.br_anatel_telefonia_movel.schedules import every_month_anatel from pipelines.datasets.br_anatel_telefonia_movel.tasks import ( - clean_csv_microdados, clean_csv_brasil, - clean_csv_uf, + clean_csv_microdados, clean_csv_municipio, + clean_csv_uf, get_today_date_atualizado, ) +from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, -) - -from pipelines.datasets.br_anatel_telefonia_movel.schedules import ( - every_month_anatel, + rename_current_flow_run_dataset_table, + update_django_metadata, ) with Flow(name="br_anatel_telefonia_movel", code_owners=["tricktx"]) as br_anatel: diff --git a/pipelines/datasets/br_anatel_telefonia_movel/schedules.py b/pipelines/datasets/br_anatel_telefonia_movel/schedules.py index 7bdaeeb7a..72a8412b8 100644 --- a/pipelines/datasets/br_anatel_telefonia_movel/schedules.py +++ b/pipelines/datasets/br_anatel_telefonia_movel/schedules.py @@ -3,11 +3,11 @@ Schedules for dataset br_anatel_telefonia_movel """ from datetime import datetime + from prefect.schedules import Schedule -from prefect.schedules.clocks import IntervalClock -from pipelines.constants import constants -from prefect.schedules.clocks import CronClock +from prefect.schedules.clocks import CronClock, IntervalClock +from pipelines.constants import constants every_month_anatel = Schedule( clocks=[ diff --git a/pipelines/datasets/br_anatel_telefonia_movel/tasks.py b/pipelines/datasets/br_anatel_telefonia_movel/tasks.py index a049b04f2..2436bb548 100644 --- a/pipelines/datasets/br_anatel_telefonia_movel/tasks.py +++ b/pipelines/datasets/br_anatel_telefonia_movel/tasks.py @@ -3,19 +3,21 @@ Tasks for dataset br_anatel_telefonia_movel """ -from prefect import task -import pandas as pd -import numpy as np -from datetime import datetime, timedelta import os +from datetime import datetime, timedelta + +import numpy as np +import pandas as pd +from prefect import task + from pipelines.constants import constants +from pipelines.datasets.br_anatel_telefonia_movel.constants import ( + constants as anatel_constants, +) from pipelines.datasets.br_anatel_telefonia_movel.utils import ( download_and_unzip, to_partitions_microdados, ) -from pipelines.datasets.br_anatel_telefonia_movel.constants import ( - constants as anatel_constants, -) from pipelines.utils.utils import log diff --git a/pipelines/datasets/br_anatel_telefonia_movel/utils.py b/pipelines/datasets/br_anatel_telefonia_movel/utils.py index c1d7071d2..d4e7a1208 100644 --- a/pipelines/datasets/br_anatel_telefonia_movel/utils.py +++ b/pipelines/datasets/br_anatel_telefonia_movel/utils.py @@ -4,13 +4,14 @@ """ # pylint: disable=too-few-public-methods,invalid-name +import os from io import BytesIO -from zipfile import ZipFile +from pathlib import Path from urllib.request import urlopen -import os -import pandas as pd +from zipfile import ZipFile + import numpy as np -from pathlib import Path +import pandas as pd def download_and_unzip(url, path): diff --git a/pipelines/datasets/br_anp_precos_combustiveis/flows.py b/pipelines/datasets/br_anp_precos_combustiveis/flows.py index ec824870b..1fb522cd5 100644 --- a/pipelines/datasets/br_anp_precos_combustiveis/flows.py +++ b/pipelines/datasets/br_anp_precos_combustiveis/flows.py @@ -4,35 +4,36 @@ """ from datetime import timedelta + from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS from prefect.tasks.prefect import create_flow_run, wait_for_flow_run + from pipelines.constants import constants -from pipelines.utils.tasks import update_django_metadata -from pipelines.utils.constants import constants as utils_constants -from pipelines.utils.decorators import Flow -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.datasets.br_anp_precos_combustiveis.utils import download_files +from pipelines.datasets.br_anp_precos_combustiveis.constants import ( + constants as anatel_constants, +) +from pipelines.datasets.br_anp_precos_combustiveis.schedules import ( + every_week_anp_microdados, +) from pipelines.datasets.br_anp_precos_combustiveis.tasks import ( - download_and_transform, + check_for_updates, data_max_bd_mais, data_max_bd_pro, + download_and_transform, make_partitions, - check_for_updates, -) -from pipelines.datasets.br_anp_precos_combustiveis.schedules import ( - every_week_anp_microdados, ) +from pipelines.datasets.br_anp_precos_combustiveis.utils import download_files +from pipelines.utils.constants import constants as utils_constants +from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, log_task, -) - -from pipelines.datasets.br_anp_precos_combustiveis.constants import ( - constants as anatel_constants, + rename_current_flow_run_dataset_table, + update_django_metadata, ) with Flow( diff --git a/pipelines/datasets/br_anp_precos_combustiveis/schedules.py b/pipelines/datasets/br_anp_precos_combustiveis/schedules.py index b1891de93..2bf80253f 100644 --- a/pipelines/datasets/br_anp_precos_combustiveis/schedules.py +++ b/pipelines/datasets/br_anp_precos_combustiveis/schedules.py @@ -4,8 +4,10 @@ """ from datetime import datetime + from prefect.schedules import Schedule from prefect.schedules.clocks import CronClock + from pipelines.constants import constants every_week_anp_microdados = Schedule( diff --git a/pipelines/datasets/br_anp_precos_combustiveis/tasks.py b/pipelines/datasets/br_anp_precos_combustiveis/tasks.py index 48c911278..456731b94 100644 --- a/pipelines/datasets/br_anp_precos_combustiveis/tasks.py +++ b/pipelines/datasets/br_anp_precos_combustiveis/tasks.py @@ -3,28 +3,30 @@ Tasks for br_anp_precos_combustiveis """ -from prefect import task -import pandas as pd -import numpy as np from datetime import datetime, timedelta + +import numpy as np +import pandas as pd +from prefect import task + +from pipelines.constants import constants +from pipelines.datasets.br_anp_precos_combustiveis.constants import ( + constants as anatel_constants, +) from pipelines.datasets.br_anp_precos_combustiveis.utils import ( + creating_column_ano, download_files, get_id_municipio, - open_csvs, - partition_data, + lower_colunm_produto, merge_table_id_municipio, + open_csvs, orderning_data_coleta, - creating_column_ano, + partition_data, rename_and_reordening, - rename_columns, rename_and_to_create_endereco, - lower_colunm_produto, -) -from pipelines.datasets.br_anp_precos_combustiveis.constants import ( - constants as anatel_constants, + rename_columns, ) -from pipelines.utils.utils import log, extract_last_date -from pipelines.constants import constants +from pipelines.utils.utils import extract_last_date, log @task diff --git a/pipelines/datasets/br_anp_precos_combustiveis/utils.py b/pipelines/datasets/br_anp_precos_combustiveis/utils.py index 38cf5eef1..7a4b2a970 100644 --- a/pipelines/datasets/br_anp_precos_combustiveis/utils.py +++ b/pipelines/datasets/br_anp_precos_combustiveis/utils.py @@ -2,17 +2,19 @@ """ General purpose functions for the br_anp_precos_combustiveis project """ +import os +from datetime import datetime + import basedosdados as bd +import numpy as np import pandas as pd -import unidecode -import os import requests -from pipelines.utils.utils import log -from datetime import datetime +import unidecode + from pipelines.datasets.br_anp_precos_combustiveis.constants import ( constants as anatel_constants, ) -import numpy as np +from pipelines.utils.utils import log def download_files(urls, path): diff --git a/pipelines/datasets/br_ans_beneficiario/flows.py b/pipelines/datasets/br_ans_beneficiario/flows.py index 0b4b968fa..265112358 100644 --- a/pipelines/datasets/br_ans_beneficiario/flows.py +++ b/pipelines/datasets/br_ans_beneficiario/flows.py @@ -5,36 +5,32 @@ from datetime import timedelta + +from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS +from prefect.tasks.prefect import create_flow_run, wait_for_flow_run + from pipelines.constants import constants +from pipelines.datasets.br_ans_beneficiario.schedules import every_day_ans from pipelines.datasets.br_ans_beneficiario.tasks import ( - extract_links_and_dates, check_for_updates, crawler_ans, - is_empty, + extract_links_and_dates, get_today_date, + is_empty, ) - -from pipelines.datasets.br_ans_beneficiario.schedules import every_day_ans -from pipelines.utils.decorators import Flow -from prefect import Parameter, case from pipelines.utils.constants import constants as utils_constants +from pipelines.utils.decorators import Flow from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.tasks import ( +from pipelines.utils.metadata.flows import update_django_metadata +from pipelines.utils.tasks import ( # update_django_metadata, create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, log_task, - # update_django_metadata, -) -from pipelines.utils.metadata.flows import update_django_metadata -from prefect.tasks.prefect import ( - create_flow_run, - wait_for_flow_run, + rename_current_flow_run_dataset_table, ) - with Flow( name="br_ans_beneficiario.informacao_consolidada", code_owners=[ diff --git a/pipelines/datasets/br_ans_beneficiario/schedules.py b/pipelines/datasets/br_ans_beneficiario/schedules.py index e607a6702..8a3fc512a 100644 --- a/pipelines/datasets/br_ans_beneficiario/schedules.py +++ b/pipelines/datasets/br_ans_beneficiario/schedules.py @@ -5,11 +5,12 @@ from datetime import datetime + from prefect.schedules import Schedule -from pipelines.constants import constants from prefect.schedules.clocks import CronClock -from pipelines.datasets.br_cvm_fi.constants import constants as cvm_constants +from pipelines.constants import constants +from pipelines.datasets.br_cvm_fi.constants import constants as cvm_constants every_day_ans = Schedule( clocks=[ diff --git a/pipelines/datasets/br_ans_beneficiario/tasks.py b/pipelines/datasets/br_ans_beneficiario/tasks.py index 6149a93b4..8aee92ae5 100644 --- a/pipelines/datasets/br_ans_beneficiario/tasks.py +++ b/pipelines/datasets/br_ans_beneficiario/tasks.py @@ -2,25 +2,24 @@ """ Tasks for br_ans_beneficiario """ -from bs4 import BeautifulSoup +import os import re -import pandas as pd from datetime import datetime + +import pandas as pd +import requests +from bs4 import BeautifulSoup from loguru import logger from prefect import task -import os from tqdm import tqdm -import requests + +from pipelines.datasets.br_ans_beneficiario.constants import constants as ans_constants from pipelines.datasets.br_ans_beneficiario.utils import ( download_unzip_csv, get_url_from_template, parquet_partition, ) -from pipelines.utils.utils import ( - log, - to_partitions, -) -from pipelines.datasets.br_ans_beneficiario.constants import constants as ans_constants +from pipelines.utils.utils import log, to_partitions @task diff --git a/pipelines/datasets/br_ans_beneficiario/utils.py b/pipelines/datasets/br_ans_beneficiario/utils.py index 37107da69..9ca2564d2 100644 --- a/pipelines/datasets/br_ans_beneficiario/utils.py +++ b/pipelines/datasets/br_ans_beneficiario/utils.py @@ -2,25 +2,21 @@ """ General purpose functions for the br_ans_beneficiario project """ +import os +import zipfile + import pandas as pd +import requests +import unidecode +from dateutil.relativedelta import relativedelta # from multiprocessing import Pool from loguru import logger - - -import os from tqdm import tqdm # import tempfile from pipelines.datasets.br_ans_beneficiario.constants import constants as ans_constants -import zipfile -import requests -from dateutil.relativedelta import relativedelta -from pipelines.utils.utils import ( - log, - to_partitions, -) -import unidecode +from pipelines.utils.utils import log, to_partitions def remove_accents(text): diff --git a/pipelines/datasets/br_b3_cotacoes/flows.py b/pipelines/datasets/br_b3_cotacoes/flows.py index d3ededa47..21068a654 100644 --- a/pipelines/datasets/br_b3_cotacoes/flows.py +++ b/pipelines/datasets/br_b3_cotacoes/flows.py @@ -4,32 +4,25 @@ """ from datetime import timedelta + from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS from prefect.tasks.prefect import create_flow_run, wait_for_flow_run -from pipelines.utils.tasks import update_django_metadata + from pipelines.constants import constants +from pipelines.datasets.br_b3_cotacoes.schedules import all_day_cotacoes +from pipelines.datasets.br_b3_cotacoes.tasks import data_max_b3, tratamento from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.datasets.br_b3_cotacoes.tasks import ( - tratamento, - data_max_b3, -) -from pipelines.utils.utils import ( - log, -) - -from pipelines.datasets.br_b3_cotacoes.schedules import ( - all_day_cotacoes, -) - from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, + rename_current_flow_run_dataset_table, + update_django_metadata, ) +from pipelines.utils.utils import log with Flow(name="br_b3_cotacoes.cotacoes", code_owners=["trick"]) as cotacoes: # Parameters diff --git a/pipelines/datasets/br_b3_cotacoes/schedules.py b/pipelines/datasets/br_b3_cotacoes/schedules.py index c2e3a93db..42c9baeca 100644 --- a/pipelines/datasets/br_b3_cotacoes/schedules.py +++ b/pipelines/datasets/br_b3_cotacoes/schedules.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- from datetime import datetime + from prefect.schedules import Schedule from prefect.schedules.clocks import CronClock + from pipelines.constants import constants all_day_cotacoes = Schedule( diff --git a/pipelines/datasets/br_b3_cotacoes/tasks.py b/pipelines/datasets/br_b3_cotacoes/tasks.py index 176cb4f2a..9bd9bd92b 100644 --- a/pipelines/datasets/br_b3_cotacoes/tasks.py +++ b/pipelines/datasets/br_b3_cotacoes/tasks.py @@ -3,18 +3,20 @@ Tasks for br_b3_cotacoes """ -from prefect import task -import pandas as pd from datetime import datetime, timedelta + +import pandas as pd +from prefect import task + +from pipelines.constants import constants from pipelines.datasets.br_b3_cotacoes.constants import ( constants as br_b3_cotacoes_constants, ) -from pipelines.utils.utils import log -from pipelines.constants import constants from pipelines.datasets.br_b3_cotacoes.utils import ( download_chunk_and_unzip_csv, process_chunk_csv, ) +from pipelines.utils.utils import log @task( diff --git a/pipelines/datasets/br_b3_cotacoes/utils.py b/pipelines/datasets/br_b3_cotacoes/utils.py index 305fb9dcd..b6eb9df65 100644 --- a/pipelines/datasets/br_b3_cotacoes/utils.py +++ b/pipelines/datasets/br_b3_cotacoes/utils.py @@ -2,17 +2,19 @@ """ General purpose functions for the br_bcb_estban project """ -import requests -import pandas as pd -import numpy as np import os import zipfile -from tqdm import tqdm from datetime import datetime -from pipelines.utils.utils import log + +import numpy as np +import pandas as pd +import requests +from tqdm import tqdm + from pipelines.datasets.br_b3_cotacoes.constants import ( constants as br_b3_cotacoes_constants, ) +from pipelines.utils.utils import log # ------- macro etapa 1 download to zip by chunk and unzip csv diff --git a/pipelines/datasets/br_bcb_agencia/flows.py b/pipelines/datasets/br_bcb_agencia/flows.py index d0a19d686..67dfa704a 100644 --- a/pipelines/datasets/br_bcb_agencia/flows.py +++ b/pipelines/datasets/br_bcb_agencia/flows.py @@ -2,36 +2,24 @@ from datetime import timedelta from prefect import Parameter, case -from prefect.tasks.prefect import ( - create_flow_run, - wait_for_flow_run, -) - -from pipelines.datasets.br_bcb_agencia.schedules import ( - every_month_agencia, -) -from pipelines.datasets.br_bcb_agencia.tasks import ( - download_data, - clean_data, -) -from pipelines.datasets.br_bcb_agencia.constants import ( - constants as agencia_constants, -) - from prefect.run_configs import KubernetesRun from prefect.storage import GCS +from prefect.tasks.prefect import create_flow_run, wait_for_flow_run + from pipelines.constants import constants -from pipelines.utils.decorators import Flow +from pipelines.datasets.br_bcb_agencia.constants import constants as agencia_constants +from pipelines.datasets.br_bcb_agencia.schedules import every_month_agencia +from pipelines.datasets.br_bcb_agencia.tasks import clean_data, download_data from pipelines.utils.constants import constants as utils_constants -from pipelines.utils.metadata.flows import update_django_metadata +from pipelines.utils.decorators import Flow from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.metadata.flows import update_django_metadata from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, + rename_current_flow_run_dataset_table, ) - with Flow( name="br_bcb_agencia.agencia", code_owners=[ diff --git a/pipelines/datasets/br_bcb_agencia/schedules.py b/pipelines/datasets/br_bcb_agencia/schedules.py index a07e10c5c..650e85733 100644 --- a/pipelines/datasets/br_bcb_agencia/schedules.py +++ b/pipelines/datasets/br_bcb_agencia/schedules.py @@ -4,8 +4,10 @@ """ from datetime import datetime -from prefect.schedules import Schedule, filters, adjustments + +from prefect.schedules import Schedule, adjustments, filters from prefect.schedules.clocks import CronClock + from pipelines.constants import constants every_month_agencia = Schedule( diff --git a/pipelines/datasets/br_bcb_agencia/tasks.py b/pipelines/datasets/br_bcb_agencia/tasks.py index dfb3c4cfb..6dcf1694f 100644 --- a/pipelines/datasets/br_bcb_agencia/tasks.py +++ b/pipelines/datasets/br_bcb_agencia/tasks.py @@ -5,38 +5,32 @@ import os -import pandas as pd from datetime import timedelta -from pipelines.datasets.br_bcb_agencia.constants import ( - constants as agencia_constants, -) +import pandas as pd +from prefect import task +from pipelines.constants import constants +from pipelines.datasets.br_bcb_agencia.constants import constants as agencia_constants from pipelines.datasets.br_bcb_agencia.utils import ( - extract_download_links, - download_and_unzip, - read_file, - clean_column_names, - rename_cols, check_and_create_column, + clean_column_names, clean_nome_municipio, + create_cnpj_col, + download_and_unzip, + extract_download_links, + format_date, get_data_from_prod, order_cols, - remove_non_numeric_chars, + read_file, remove_empty_spaces, - format_date, remove_latin1_accents_from_df, - strip_dataframe_columns, + remove_non_numeric_chars, + rename_cols, str_to_title, - create_cnpj_col, -) - -from prefect import task -from pipelines.utils.utils import ( - log, - to_partitions, + strip_dataframe_columns, ) -from pipelines.constants import constants +from pipelines.utils.utils import log, to_partitions @task( diff --git a/pipelines/datasets/br_bcb_agencia/utils.py b/pipelines/datasets/br_bcb_agencia/utils.py index 06bab6ba0..3eeb89ca9 100644 --- a/pipelines/datasets/br_bcb_agencia/utils.py +++ b/pipelines/datasets/br_bcb_agencia/utils.py @@ -4,20 +4,20 @@ """ -import requests -from lxml import html +import os +import re +import unicodedata from io import BytesIO -from zipfile import ZipFile from urllib.request import urlopen +from zipfile import ZipFile + import basedosdados as bd -import os -import pandas as pd -import re import numpy as np -import unicodedata -from pipelines.utils.utils import ( - log, -) +import pandas as pd +import requests +from lxml import html + +from pipelines.utils.utils import log # ---- functions to download data diff --git a/pipelines/datasets/br_bcb_estban/flows.py b/pipelines/datasets/br_bcb_estban/flows.py index 46aec245d..c519474f3 100644 --- a/pipelines/datasets/br_bcb_estban/flows.py +++ b/pipelines/datasets/br_bcb_estban/flows.py @@ -4,38 +4,34 @@ """ from datetime import timedelta + +from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS -from prefect import Parameter, case -from prefect.tasks.prefect import ( - create_flow_run, - wait_for_flow_run, -) +from prefect.tasks.prefect import create_flow_run, wait_for_flow_run from pipelines.constants import constants -from pipelines.utils.constants import constants as utils_constants -from pipelines.datasets.br_bcb_estban.tasks import ( - download_estban_files, - cleaning_municipios_data, - cleaning_agencias_data, - get_id_municipio, +from pipelines.datasets.br_bcb_estban.constants import ( + constants as br_bcb_estban_constants, ) - from pipelines.datasets.br_bcb_estban.schedules import ( every_month_agencia, every_month_municipio, ) - -from pipelines.datasets.br_bcb_estban.constants import ( - constants as br_bcb_estban_constants, +from pipelines.datasets.br_bcb_estban.tasks import ( + cleaning_agencias_data, + cleaning_municipios_data, + download_estban_files, + get_id_municipio, ) +from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow -from pipelines.utils.metadata.flows import update_django_metadata from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.metadata.flows import update_django_metadata from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, + rename_current_flow_run_dataset_table, ) with Flow( diff --git a/pipelines/datasets/br_bcb_estban/schedules.py b/pipelines/datasets/br_bcb_estban/schedules.py index c2a0355df..450ab758c 100644 --- a/pipelines/datasets/br_bcb_estban/schedules.py +++ b/pipelines/datasets/br_bcb_estban/schedules.py @@ -7,10 +7,11 @@ # and 90 days during december (...) from datetime import datetime -from prefect.schedules import Schedule, filters, adjustments + +from prefect.schedules import Schedule, adjustments, filters from prefect.schedules.clocks import CronClock -from pipelines.constants import constants +from pipelines.constants import constants every_month_agencia = Schedule( clocks=[ diff --git a/pipelines/datasets/br_bcb_estban/tasks.py b/pipelines/datasets/br_bcb_estban/tasks.py index f894a714c..87683c62e 100644 --- a/pipelines/datasets/br_bcb_estban/tasks.py +++ b/pipelines/datasets/br_bcb_estban/tasks.py @@ -3,41 +3,37 @@ Tasks for br_bcb_estban """ -from prefect import task -import pandas as pd import os +from datetime import timedelta + import basedosdados as bd +import pandas as pd +from prefect import task from pipelines.constants import constants from pipelines.datasets.br_bcb_estban.constants import ( constants as br_bcb_estban_constants, ) -from datetime import timedelta - -from pipelines.utils.utils import ( - clean_dataframe, - to_partitions, - log, -) from pipelines.datasets.br_bcb_estban.utils import * from pipelines.datasets.br_bcb_estban.utils import ( - extract_download_links, - download_and_unzip, - read_files, - rename_columns_municipio, + cols_order_agencia, create_id_municipio, - pre_cleaning_for_pivot_long_municipio, - wide_to_long_municipio, - order_cols_municipio, - standardize_monetary_units, create_id_verbete_column, create_month_year_columns, - rename_columns_agencia, + download_and_unzip, + extract_download_links, + get_data_from_prod, + order_cols_municipio, pre_cleaning_for_pivot_long_agencia, + pre_cleaning_for_pivot_long_municipio, + read_files, + rename_columns_agencia, + rename_columns_municipio, + standardize_monetary_units, wide_to_long_agencia, - cols_order_agencia, - get_data_from_prod, + wide_to_long_municipio, ) +from pipelines.utils.utils import clean_dataframe, log, to_partitions @task( diff --git a/pipelines/datasets/br_bcb_estban/utils.py b/pipelines/datasets/br_bcb_estban/utils.py index 1f3bc4f38..834629cde 100644 --- a/pipelines/datasets/br_bcb_estban/utils.py +++ b/pipelines/datasets/br_bcb_estban/utils.py @@ -2,20 +2,20 @@ """ General purpose functions for the br_bcb_estban project """ -import requests -from lxml import html -import basedosdados as bd +import os +import re +import unicodedata from io import BytesIO -from zipfile import ZipFile from urllib.request import urlopen -import pandas as pd -import unicodedata +from zipfile import ZipFile + +import basedosdados as bd import numpy as np -import re -import os -from pipelines.utils.utils import ( - log, -) +import pandas as pd +import requests +from lxml import html + +from pipelines.utils.utils import log # ------- macro etapa 1 download de dados diff --git a/pipelines/datasets/br_bcb_taxa_cambio/flows.py b/pipelines/datasets/br_bcb_taxa_cambio/flows.py index d11b84e03..fb005d261 100644 --- a/pipelines/datasets/br_bcb_taxa_cambio/flows.py +++ b/pipelines/datasets/br_bcb_taxa_cambio/flows.py @@ -4,28 +4,30 @@ """ from datetime import timedelta + +from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS +from prefect.tasks.prefect import create_flow_run, wait_for_flow_run + from pipelines.constants import constants +from pipelines.datasets.br_bcb_taxa_cambio.constants import constants as constants_bcb +from pipelines.datasets.br_bcb_taxa_cambio.schedules import ( + schedule_every_weekday_taxa_cambio, +) from pipelines.datasets.br_bcb_taxa_cambio.tasks import ( get_data_taxa_cambio, treat_data_taxa_cambio, ) -from pipelines.utils.metadata.tasks import update_django_metadata +from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow -from pipelines.datasets.br_bcb_taxa_cambio.constants import constants as constants_bcb -from prefect import Parameter, case -from prefect.tasks.prefect import create_flow_run, wait_for_flow_run +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.metadata.tasks import update_django_metadata from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, get_current_flow_labels, rename_current_flow_run_dataset_table, ) -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.constants import constants as utils_constants -from pipelines.datasets.br_bcb_taxa_cambio.schedules import ( - schedule_every_weekday_taxa_cambio, -) with Flow( name="br_bcb_taxa_cambio.taxa_cambio", diff --git a/pipelines/datasets/br_bcb_taxa_cambio/schedules.py b/pipelines/datasets/br_bcb_taxa_cambio/schedules.py index b11575d62..0ad5c7435 100644 --- a/pipelines/datasets/br_bcb_taxa_cambio/schedules.py +++ b/pipelines/datasets/br_bcb_taxa_cambio/schedules.py @@ -3,9 +3,11 @@ Schedules for br_bcb_indicadores """ -from prefect.schedules import Schedule, filters, adjustments -from prefect.schedules.clocks import CronClock from datetime import datetime + +from prefect.schedules import Schedule, adjustments, filters +from prefect.schedules.clocks import CronClock + from pipelines.constants import constants schedule_every_weekday_taxa_cambio = Schedule( diff --git a/pipelines/datasets/br_bcb_taxa_cambio/tasks.py b/pipelines/datasets/br_bcb_taxa_cambio/tasks.py index 1e745675c..30f7b75d3 100644 --- a/pipelines/datasets/br_bcb_taxa_cambio/tasks.py +++ b/pipelines/datasets/br_bcb_taxa_cambio/tasks.py @@ -3,9 +3,9 @@ Tasks for br_bcb_indicadores """ -from pipelines.utils.utils import log, to_partitions -from prefect import task import pandas as pd +from prefect import task + from pipelines.datasets.br_bcb_taxa_cambio.constants import constants as bcb_constants from pipelines.datasets.br_bcb_taxa_cambio.utils import ( available_currencies, @@ -14,6 +14,7 @@ save_input, treat_currency_df, ) +from pipelines.utils.utils import log, to_partitions @task diff --git a/pipelines/datasets/br_bcb_taxa_cambio/utils.py b/pipelines/datasets/br_bcb_taxa_cambio/utils.py index 1a822e33d..aa15e166d 100644 --- a/pipelines/datasets/br_bcb_taxa_cambio/utils.py +++ b/pipelines/datasets/br_bcb_taxa_cambio/utils.py @@ -2,20 +2,22 @@ """ General purpose functions for the br_bcb_indicadores project """ +import datetime +import os +import time as tm +from io import BytesIO from urllib.request import urlopen from zipfile import ZipFile -import requests + import pandas as pd -import datetime import pytz -import os -from io import BytesIO -import time as tm +import requests + from pipelines.datasets.br_bcb_taxa_cambio.constants import constants as bcb_constants -from pipelines.utils.utils import log from pipelines.utils.apply_architecture_to_dataframe.utils import ( apply_architecture_to_dataframe, ) +from pipelines.utils.utils import log def available_currencies() -> dict: diff --git a/pipelines/datasets/br_bcb_taxa_selic/flows.py b/pipelines/datasets/br_bcb_taxa_selic/flows.py index 35ad58318..364af312c 100644 --- a/pipelines/datasets/br_bcb_taxa_selic/flows.py +++ b/pipelines/datasets/br_bcb_taxa_selic/flows.py @@ -4,31 +4,30 @@ """ from datetime import timedelta + +from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS -from prefect import Parameter, case from prefect.tasks.prefect import create_flow_run, wait_for_flow_run -from pipelines.utils.metadata.tasks import update_django_metadata -from pipelines.utils.decorators import Flow from pipelines.constants import constants -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.constants import constants as utils_constants -from pipelines.utils.tasks import ( - create_table_and_upload_to_gcs, - get_current_flow_labels, - rename_current_flow_run_dataset_table, +from pipelines.datasets.br_bcb_taxa_selic.schedules import ( + schedule_every_weekday_taxa_selic, ) - from pipelines.datasets.br_bcb_taxa_selic.tasks import ( get_data_taxa_selic, treat_data_taxa_selic, ) -from pipelines.datasets.br_bcb_taxa_selic.schedules import ( - schedule_every_weekday_taxa_selic, +from pipelines.utils.constants import constants as utils_constants +from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.metadata.tasks import update_django_metadata +from pipelines.utils.tasks import ( + create_table_and_upload_to_gcs, + get_current_flow_labels, + rename_current_flow_run_dataset_table, ) - with Flow( name="br_bcb_taxa_selic.taxa_selic", code_owners=[ diff --git a/pipelines/datasets/br_bcb_taxa_selic/schedules.py b/pipelines/datasets/br_bcb_taxa_selic/schedules.py index 1ff892c7e..4be6bea9b 100644 --- a/pipelines/datasets/br_bcb_taxa_selic/schedules.py +++ b/pipelines/datasets/br_bcb_taxa_selic/schedules.py @@ -3,9 +3,11 @@ Schedules for br-bcb-taxa-selic """ -from prefect.schedules import Schedule, filters, adjustments -from prefect.schedules.clocks import CronClock from datetime import datetime + +from prefect.schedules import Schedule, adjustments, filters +from prefect.schedules.clocks import CronClock + from pipelines.constants import constants schedule_every_weekday_taxa_selic = Schedule( diff --git a/pipelines/datasets/br_bcb_taxa_selic/tasks.py b/pipelines/datasets/br_bcb_taxa_selic/tasks.py index fd59782aa..850aa8c59 100644 --- a/pipelines/datasets/br_bcb_taxa_selic/tasks.py +++ b/pipelines/datasets/br_bcb_taxa_selic/tasks.py @@ -3,6 +3,7 @@ Tasks for br-bcb-taxa-selic """ from prefect import task + from pipelines.datasets.br_bcb_taxa_selic.utils import ( get_selic_data, read_input_csv, diff --git a/pipelines/datasets/br_bcb_taxa_selic/utils.py b/pipelines/datasets/br_bcb_taxa_selic/utils.py index b5a7da0d9..67d7cd1de 100644 --- a/pipelines/datasets/br_bcb_taxa_selic/utils.py +++ b/pipelines/datasets/br_bcb_taxa_selic/utils.py @@ -3,22 +3,22 @@ General purpose functions for the br-bcb-taxa-selic project """ -from io import BytesIO import os +import time as tm +from io import BytesIO from urllib.request import urlopen from zipfile import ZipFile + import pandas as pd import requests -import time as tm -from pipelines.utils.utils import ( - log, + +from pipelines.datasets.br_bcb_taxa_selic.constants import ( + constants as taxa_selic_constants, ) from pipelines.utils.apply_architecture_to_dataframe.utils import ( apply_architecture_to_dataframe, ) -from pipelines.datasets.br_bcb_taxa_selic.constants import ( - constants as taxa_selic_constants, -) +from pipelines.utils.utils import log def create_url_selic(start_date: str, end_date: str, moeda="USD") -> str: diff --git a/pipelines/datasets/br_bd_indicadores/flows.py b/pipelines/datasets/br_bd_indicadores/flows.py index 4080de496..4a00115a6 100755 --- a/pipelines/datasets/br_bd_indicadores/flows.py +++ b/pipelines/datasets/br_bd_indicadores/flows.py @@ -14,21 +14,21 @@ from pipelines.datasets.br_bd_indicadores.schedules import ( every_day, every_week, - schedule_users, - schedule_receitas, + schedule_contabilidade, schedule_equipes, schedule_pessoas, - schedule_contabilidade, + schedule_receitas, + schedule_users, ) from pipelines.datasets.br_bd_indicadores.tasks import ( crawler_metricas, crawler_real_time, - has_new_tweets, - echo, - get_twitter_credentials, - get_ga_credentials, crawler_report_ga, + echo, get_data_from_sheet, + get_ga_credentials, + get_twitter_credentials, + has_new_tweets, save_data_to_csv, ) from pipelines.utils.constants import constants as utils_constants @@ -36,8 +36,8 @@ from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, + rename_current_flow_run_dataset_table, update_metadata, ) diff --git a/pipelines/datasets/br_bd_indicadores/schedules.py b/pipelines/datasets/br_bd_indicadores/schedules.py index bfe928f5e..c90052904 100755 --- a/pipelines/datasets/br_bd_indicadores/schedules.py +++ b/pipelines/datasets/br_bd_indicadores/schedules.py @@ -3,9 +3,9 @@ Schedules for bd_tweet_data """ -from datetime import timedelta, datetime +from datetime import datetime, timedelta -from prefect.schedules import Schedule, filters, adjustments +from prefect.schedules import Schedule, adjustments, filters from prefect.schedules.clocks import IntervalClock from pipelines.constants import constants diff --git a/pipelines/datasets/br_bd_indicadores/tasks.py b/pipelines/datasets/br_bd_indicadores/tasks.py index f9176e79d..936b8fe9a 100644 --- a/pipelines/datasets/br_bd_indicadores/tasks.py +++ b/pipelines/datasets/br_bd_indicadores/tasks.py @@ -7,7 +7,7 @@ from datetime import datetime, timedelta from functools import reduce from pathlib import Path -from typing import Tuple, Optional +from typing import Optional, Tuple import numpy as np import pandas as pd @@ -20,21 +20,17 @@ from pipelines.constants import constants from pipelines.datasets.br_bd_indicadores.utils import ( + GA4RealTimeReport, + connect_to_endpoint, + create_google_sheet_url, create_headers, create_url, - connect_to_endpoint, flatten, - GA4RealTimeReport, - parse_data, - initialize_analyticsreporting, get_report, - create_google_sheet_url, -) -from pipelines.utils.utils import ( - get_storage_blobs, - log, - get_credentials_from_secret, + initialize_analyticsreporting, + parse_data, ) +from pipelines.utils.utils import get_credentials_from_secret, get_storage_blobs, log # pylint: disable=C0103 diff --git a/pipelines/datasets/br_bd_indicadores/utils.py b/pipelines/datasets/br_bd_indicadores/utils.py index 8d389d6d6..98963d4c9 100644 --- a/pipelines/datasets/br_bd_indicadores/utils.py +++ b/pipelines/datasets/br_bd_indicadores/utils.py @@ -5,8 +5,7 @@ # pylint: disable=too-few-public-methods import collections import os -from typing import List -from typing import Tuple +from typing import List, Tuple import pandas as pd import requests @@ -14,8 +13,6 @@ from google.analytics.data_v1beta.types import ( Dimension, Metric, -) -from google.analytics.data_v1beta.types import ( RunRealtimeReportRequest, ) from googleapiclient.discovery import build diff --git a/pipelines/datasets/br_bd_metadados/flows.py b/pipelines/datasets/br_bd_metadados/flows.py index 066e2ee45..bbb2bfff9 100755 --- a/pipelines/datasets/br_bd_metadados/flows.py +++ b/pipelines/datasets/br_bd_metadados/flows.py @@ -12,30 +12,30 @@ from pipelines.constants import constants from pipelines.datasets.br_bd_metadados.schedules import ( - every_day_organizations, + every_day_columns, every_day_datasets, - every_day_resources, every_day_external_links, every_day_information_requests, + every_day_organizations, + every_day_resources, every_day_tables, - every_day_columns, ) from pipelines.datasets.br_bd_metadados.tasks import ( - crawler_organizations, + crawler_columns, crawler_datasets, - crawler_resources, crawler_external_links, crawler_information_requests, + crawler_organizations, + crawler_resources, crawler_tables, - crawler_columns, ) from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, + rename_current_flow_run_dataset_table, ) with Flow( diff --git a/pipelines/datasets/br_bd_metadados/schedules.py b/pipelines/datasets/br_bd_metadados/schedules.py index 9d10182e6..1e6313d8f 100755 --- a/pipelines/datasets/br_bd_metadados/schedules.py +++ b/pipelines/datasets/br_bd_metadados/schedules.py @@ -3,7 +3,7 @@ Schedules for bd_tweet_data """ -from datetime import timedelta, datetime +from datetime import datetime, timedelta from prefect.schedules import Schedule from prefect.schedules.clocks import IntervalClock diff --git a/pipelines/datasets/br_bd_metadados/tasks.py b/pipelines/datasets/br_bd_metadados/tasks.py index b9a366079..0f8373d42 100755 --- a/pipelines/datasets/br_bd_metadados/tasks.py +++ b/pipelines/datasets/br_bd_metadados/tasks.py @@ -13,12 +13,10 @@ from pipelines.constants import constants from pipelines.datasets.br_bd_metadados.utils import ( - get_temporal_coverage_list, check_missing_metadata, + get_temporal_coverage_list, ) -from pipelines.utils.utils import ( - log, -) +from pipelines.utils.utils import log # pylint: disable=C0103 diff --git a/pipelines/datasets/br_bd_metadados/utils.py b/pipelines/datasets/br_bd_metadados/utils.py index 407140ce1..f4848fa7e 100755 --- a/pipelines/datasets/br_bd_metadados/utils.py +++ b/pipelines/datasets/br_bd_metadados/utils.py @@ -4,6 +4,7 @@ """ # pylint: disable=too-few-public-methods,invalid-name from datetime import datetime + import pandas as pd diff --git a/pipelines/datasets/br_cgu_pessoal_executivo_federal/flows.py b/pipelines/datasets/br_cgu_pessoal_executivo_federal/flows.py index 936f4a8f1..74b7a410a 100644 --- a/pipelines/datasets/br_cgu_pessoal_executivo_federal/flows.py +++ b/pipelines/datasets/br_cgu_pessoal_executivo_federal/flows.py @@ -4,27 +4,27 @@ """ from datetime import timedelta +from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS -from prefect import Parameter, case from prefect.tasks.prefect import create_flow_run, wait_for_flow_run -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.constants import constants as utils_constants -from pipelines.utils.tasks import ( - create_table_and_upload_to_gcs, - update_metadata, - rename_current_flow_run_dataset_table, - get_current_flow_labels, -) from pipelines.constants import constants +from pipelines.datasets.br_cgu_pessoal_executivo_federal.schedules import ( + every_four_months, +) from pipelines.datasets.br_cgu_pessoal_executivo_federal.tasks import ( - crawl, clean_save_table, + crawl, ) +from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow -from pipelines.datasets.br_cgu_pessoal_executivo_federal.schedules import ( - every_four_months, +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.tasks import ( + create_table_and_upload_to_gcs, + get_current_flow_labels, + rename_current_flow_run_dataset_table, + update_metadata, ) ROOT = "/tmp/data" diff --git a/pipelines/datasets/br_cgu_pessoal_executivo_federal/schedules.py b/pipelines/datasets/br_cgu_pessoal_executivo_federal/schedules.py index c6283e891..b11fa8229 100644 --- a/pipelines/datasets/br_cgu_pessoal_executivo_federal/schedules.py +++ b/pipelines/datasets/br_cgu_pessoal_executivo_federal/schedules.py @@ -4,10 +4,10 @@ """ from datetime import datetime -from prefect.schedules import Schedule, filters, adjustments +from prefect.schedules import Schedule, adjustments, filters from prefect.schedules.clocks import CronClock -from pipelines.constants import constants +from pipelines.constants import constants every_four_months = Schedule( clocks=[ diff --git a/pipelines/datasets/br_cgu_pessoal_executivo_federal/tasks.py b/pipelines/datasets/br_cgu_pessoal_executivo_federal/tasks.py index ae2c01ca9..9033e652d 100644 --- a/pipelines/datasets/br_cgu_pessoal_executivo_federal/tasks.py +++ b/pipelines/datasets/br_cgu_pessoal_executivo_federal/tasks.py @@ -6,9 +6,9 @@ import re from io import BytesIO from typing import Tuple -import requests import pandas as pd +import requests from bs4 import BeautifulSoup from prefect import task diff --git a/pipelines/datasets/br_cvm_administradores_carteira/flows.py b/pipelines/datasets/br_cvm_administradores_carteira/flows.py index 87a402e7c..8413a1e49 100644 --- a/pipelines/datasets/br_cvm_administradores_carteira/flows.py +++ b/pipelines/datasets/br_cvm_administradores_carteira/flows.py @@ -12,27 +12,26 @@ from pipelines.constants import constants from pipelines.datasets.br_cvm_administradores_carteira.schedules import ( - schedule_responsavel, schedule_fisica, schedule_juridica, + schedule_responsavel, ) from pipelines.datasets.br_cvm_administradores_carteira.tasks import ( - crawl, - clean_table_responsavel, clean_table_pessoa_fisica, clean_table_pessoa_juridica, + clean_table_responsavel, + crawl, extract_last_date, ) -from pipelines.utils.metadata.tasks import update_django_metadata - from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.metadata.tasks import update_django_metadata from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, + get_current_flow_labels, get_temporal_coverage, rename_current_flow_run_dataset_table, - get_current_flow_labels, ) ROOT = "/tmp/data" diff --git a/pipelines/datasets/br_cvm_administradores_carteira/schedules.py b/pipelines/datasets/br_cvm_administradores_carteira/schedules.py index dea838d09..69c70bfec 100644 --- a/pipelines/datasets/br_cvm_administradores_carteira/schedules.py +++ b/pipelines/datasets/br_cvm_administradores_carteira/schedules.py @@ -2,7 +2,7 @@ """ Schedules for br_cvm_administradores_carteira """ -from datetime import timedelta, datetime +from datetime import datetime, timedelta from prefect.schedules import Schedule, filters from prefect.schedules.clocks import IntervalClock diff --git a/pipelines/datasets/br_cvm_administradores_carteira/tasks.py b/pipelines/datasets/br_cvm_administradores_carteira/tasks.py index c202cc2aa..62716e2af 100644 --- a/pipelines/datasets/br_cvm_administradores_carteira/tasks.py +++ b/pipelines/datasets/br_cvm_administradores_carteira/tasks.py @@ -7,12 +7,13 @@ import os import shutil +import basedosdados as bd import pandas as pd import requests from pandas.api.types import is_string_dtype from prefect import task from unidecode import unidecode -import basedosdados as bd + from pipelines.utils.utils import log diff --git a/pipelines/datasets/br_cvm_fi/flows.py b/pipelines/datasets/br_cvm_fi/flows.py index 4acaca4ea..d73d36b8c 100644 --- a/pipelines/datasets/br_cvm_fi/flows.py +++ b/pipelines/datasets/br_cvm_fi/flows.py @@ -4,48 +4,47 @@ """ +from datetime import timedelta + +from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS -from datetime import timedelta +from prefect.tasks.prefect import create_flow_run, wait_for_flow_run + +from pipelines.constants import constants +from pipelines.datasets.br_cvm_fi.constants import constants as cvm_constants +from pipelines.datasets.br_cvm_fi.schedules import ( + every_day_balancete, + every_day_carteiras, + every_day_extratos, + every_day_informacao_cadastral, + every_day_informe, + every_day_perfil, +) from pipelines.datasets.br_cvm_fi.tasks import ( - extract_links_and_dates, check_for_updates, - is_empty, - download_unzip_csv, + check_for_updates_ext, clean_data_and_make_partitions, + clean_data_make_partitions_balancete, + clean_data_make_partitions_cad, clean_data_make_partitions_cda, - download_csv_cvm, clean_data_make_partitions_ext, - check_for_updates_ext, clean_data_make_partitions_perfil, - clean_data_make_partitions_cad, - clean_data_make_partitions_balancete, + download_csv_cvm, + download_unzip_csv, + extract_links_and_dates, get_today_date, + is_empty, ) -from pipelines.datasets.br_cvm_fi.schedules import ( - every_day_informe, - every_day_carteiras, - every_day_balancete, - every_day_extratos, - every_day_informacao_cadastral, - every_day_perfil, -) -from prefect.tasks.prefect import create_flow_run, wait_for_flow_run +from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow -from prefect import Parameter, case from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.constants import constants as utils_constants -from pipelines.datasets.br_cvm_fi.constants import constants as cvm_constants -from pipelines.constants import constants -from pipelines.utils.tasks import ( - log_task, - # update_django_metadata, -) from pipelines.utils.metadata.tasks import update_django_metadata -from pipelines.utils.tasks import ( +from pipelines.utils.tasks import ( # update_django_metadata, create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, + log_task, + rename_current_flow_run_dataset_table, ) # rom pipelines.datasets.br_cvm_fi.schedules import every_day_cvm diff --git a/pipelines/datasets/br_cvm_fi/schedules.py b/pipelines/datasets/br_cvm_fi/schedules.py index e6df000f7..a6e9a9b37 100644 --- a/pipelines/datasets/br_cvm_fi/schedules.py +++ b/pipelines/datasets/br_cvm_fi/schedules.py @@ -4,13 +4,13 @@ """ from datetime import datetime + from prefect.schedules import Schedule -from prefect.schedules.clocks import IntervalClock +from prefect.schedules.clocks import CronClock, IntervalClock + from pipelines.constants import constants -from prefect.schedules.clocks import CronClock from pipelines.datasets.br_cvm_fi.constants import constants as cvm_constants - every_day_informe = Schedule( clocks=[ CronClock( diff --git a/pipelines/datasets/br_cvm_fi/tasks.py b/pipelines/datasets/br_cvm_fi/tasks.py index 491c17f17..3344a418e 100644 --- a/pipelines/datasets/br_cvm_fi/tasks.py +++ b/pipelines/datasets/br_cvm_fi/tasks.py @@ -3,33 +3,32 @@ Tasks for br_cvm_fi """ -from prefect import task -import pandas as pd +import glob import os +import re +import zipfile from datetime import datetime + +import pandas as pd import requests -from tqdm import tqdm -import zipfile +import rpy2.robjects as ro +import rpy2.robjects.packages as rpackages from bs4 import BeautifulSoup -import re -import glob +from prefect import task +from rpy2.robjects import pandas2ri from rpy2.robjects.packages import importr -import rpy2.robjects.packages as rpackages -import rpy2.robjects as ro from rpy2.robjects.vectors import StrVector -from rpy2.robjects import pandas2ri +from tqdm import tqdm + +from pipelines.datasets.br_cvm_fi.constants import constants as cvm_constants from pipelines.datasets.br_cvm_fi.utils import ( - sheet_to_df, - rename_columns, check_and_create_column, limpar_string, obter_anos_meses, + rename_columns, + sheet_to_df, ) -from pipelines.utils.utils import ( - log, - to_partitions, -) -from pipelines.datasets.br_cvm_fi.constants import constants as cvm_constants +from pipelines.utils.utils import log, to_partitions @task diff --git a/pipelines/datasets/br_cvm_fi/utils.py b/pipelines/datasets/br_cvm_fi/utils.py index 48953d2c1..a7c12c47b 100644 --- a/pipelines/datasets/br_cvm_fi/utils.py +++ b/pipelines/datasets/br_cvm_fi/utils.py @@ -2,11 +2,12 @@ """ General purpose functions for the br_cvm_fi project """ -from io import StringIO -import requests -import pandas as pd import os import re +from io import StringIO + +import pandas as pd +import requests from unidecode import unidecode diff --git a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/flows.py b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/flows.py index 20ef02fb8..d11400b10 100644 --- a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/flows.py +++ b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/flows.py @@ -5,27 +5,27 @@ # pylint: disable=C0103, E1123, invalid-name from datetime import timedelta +from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS -from prefect import Parameter, case from prefect.tasks.prefect import create_flow_run, wait_for_flow_run -from pipelines.utils.metadata.tasks import update_django_metadata -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.constants import constants as utils_constants from pipelines.constants import constants +from pipelines.datasets.br_cvm_oferta_publica_distribuicao.schedules import schedule_dia from pipelines.datasets.br_cvm_oferta_publica_distribuicao.tasks import ( - crawl, clean_table_oferta_distribuicao, + crawl, get_today_date, ) +from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.metadata.tasks import update_django_metadata from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, + rename_current_flow_run_dataset_table, ) -from pipelines.datasets.br_cvm_oferta_publica_distribuicao.schedules import schedule_dia ROOT = "/tmp/data" URL = "http://dados.cvm.gov.br/dados/OFERTA/DISTRIB/DADOS/oferta_distribuicao.csv" diff --git a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/schedules.py b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/schedules.py index 776afaf64..fa1f719cf 100644 --- a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/schedules.py +++ b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/schedules.py @@ -3,9 +3,11 @@ Schedules for br_cvm_oferta_publica_distribuicao """ -from datetime import timedelta, datetime +from datetime import datetime, timedelta + from prefect.schedules import Schedule, filters from prefect.schedules.clocks import IntervalClock + from pipelines.constants import constants schedule_dia = Schedule( diff --git a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/tasks.py b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/tasks.py index 74792b2c0..4e3d5902b 100644 --- a/pipelines/datasets/br_cvm_oferta_publica_distribuicao/tasks.py +++ b/pipelines/datasets/br_cvm_oferta_publica_distribuicao/tasks.py @@ -3,13 +3,14 @@ Tasks for br_cvm_oferta_publica_distribuicao """ import os +from datetime import datetime +import basedosdados as bd import pandas as pd from pandas.api.types import is_string_dtype from prefect import task from unidecode import unidecode -import basedosdados as bd -from datetime import datetime + from pipelines.utils.utils import log diff --git a/pipelines/datasets/br_fgv_igp/flows.py b/pipelines/datasets/br_fgv_igp/flows.py index 6b241a01a..94a9536bf 100644 --- a/pipelines/datasets/br_fgv_igp/flows.py +++ b/pipelines/datasets/br_fgv_igp/flows.py @@ -14,26 +14,24 @@ from pipelines.constants import constants from pipelines.datasets.br_fgv_igp.schedules import ( - igp_di_mes, + igp_10_mes, igp_di_ano, - igp_m_mes, + igp_di_mes, igp_m_ano, + igp_m_mes, igp_og_ano, igp_og_mes, - igp_10_mes, ) -from pipelines.datasets.br_fgv_igp.tasks import crawler_fgv, clean_fgv_df +from pipelines.datasets.br_fgv_igp.tasks import clean_fgv_df, crawler_fgv from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - update_metadata, + get_current_flow_labels, get_temporal_coverage, -) -from pipelines.utils.tasks import ( rename_current_flow_run_dataset_table, - get_current_flow_labels, + update_metadata, ) ROOT = Path("tmp/data") diff --git a/pipelines/datasets/br_ibge_inpc/flows.py b/pipelines/datasets/br_ibge_inpc/flows.py index 871e4376b..216acd39b 100644 --- a/pipelines/datasets/br_ibge_inpc/flows.py +++ b/pipelines/datasets/br_ibge_inpc/flows.py @@ -11,19 +11,18 @@ from pipelines.constants import constants from pipelines.datasets.br_ibge_inpc.schedules import ( + br_ibge_inpc_mes_brasil_every_month, br_ibge_inpc_mes_categoria_brasil_every_month, - br_ibge_inpc_mes_categoria_rm_every_month, br_ibge_inpc_mes_categoria_municipio_every_month, - br_ibge_inpc_mes_brasil_every_month, + br_ibge_inpc_mes_categoria_rm_every_month, ) from pipelines.utils.crawler_ibge_inflacao.flows import ( flow_ibge_inflacao_mes_brasil, - flow_ibge_inflacao_mes_rm, - flow_ibge_inflacao_mes_municipio, flow_ibge_inflacao_mes_geral, + flow_ibge_inflacao_mes_municipio, + flow_ibge_inflacao_mes_rm, ) - br_ibge_inpc_mes_categoria_brasil = deepcopy(flow_ibge_inflacao_mes_brasil) br_ibge_inpc_mes_categoria_brasil.name = "br_ibge_inpc.mes_categoria_brasil" br_ibge_inpc_mes_categoria_brasil.code_owners = ["lucas_cr"] diff --git a/pipelines/datasets/br_ibge_ipca/flows.py b/pipelines/datasets/br_ibge_ipca/flows.py index 339b3e813..053710525 100644 --- a/pipelines/datasets/br_ibge_ipca/flows.py +++ b/pipelines/datasets/br_ibge_ipca/flows.py @@ -11,19 +11,18 @@ from pipelines.constants import constants from pipelines.datasets.br_ibge_ipca.schedules import ( + br_ibge_ipca_mes_brasil_every_month, br_ibge_ipca_mes_categoria_brasil_every_month, - br_ibge_ipca_mes_categoria_rm_every_month, br_ibge_ipca_mes_categoria_municipio_every_month, - br_ibge_ipca_mes_brasil_every_month, + br_ibge_ipca_mes_categoria_rm_every_month, ) from pipelines.utils.crawler_ibge_inflacao.flows import ( flow_ibge_inflacao_mes_brasil, - flow_ibge_inflacao_mes_rm, - flow_ibge_inflacao_mes_municipio, flow_ibge_inflacao_mes_geral, + flow_ibge_inflacao_mes_municipio, + flow_ibge_inflacao_mes_rm, ) - br_ibge_ipca_mes_categoria_brasil = deepcopy(flow_ibge_inflacao_mes_brasil) br_ibge_ipca_mes_categoria_brasil.name = "br_ibge_ipca.mes_categoria_brasil" br_ibge_ipca_mes_categoria_brasil.code_owners = ["lucas_cr"] diff --git a/pipelines/datasets/br_ibge_ipca15/flows.py b/pipelines/datasets/br_ibge_ipca15/flows.py index 9173be755..77fe8c539 100644 --- a/pipelines/datasets/br_ibge_ipca15/flows.py +++ b/pipelines/datasets/br_ibge_ipca15/flows.py @@ -11,19 +11,18 @@ from pipelines.constants import constants from pipelines.datasets.br_ibge_ipca15.schedules import ( + br_ibge_ipca15_mes_brasil_every_month, br_ibge_ipca15_mes_categoria_brasil_every_month, - br_ibge_ipca15_mes_categoria_rm_every_month, br_ibge_ipca15_mes_categoria_municipio_every_month, - br_ibge_ipca15_mes_brasil_every_month, + br_ibge_ipca15_mes_categoria_rm_every_month, ) from pipelines.utils.crawler_ibge_inflacao.flows import ( flow_ibge_inflacao_mes_brasil, - flow_ibge_inflacao_mes_rm, - flow_ibge_inflacao_mes_municipio, flow_ibge_inflacao_mes_geral, + flow_ibge_inflacao_mes_municipio, + flow_ibge_inflacao_mes_rm, ) - br_ibge_ipca15_mes_categoria_brasil = deepcopy(flow_ibge_inflacao_mes_brasil) br_ibge_ipca15_mes_categoria_brasil.name = "br_ibge_ipca15.mes_categoria_brasil" br_ibge_ipca15_mes_categoria_brasil.code_owners = ["lucas_cr"] diff --git a/pipelines/datasets/br_ibge_pnadc/flows.py b/pipelines/datasets/br_ibge_pnadc/flows.py index 253a733f5..379579203 100644 --- a/pipelines/datasets/br_ibge_pnadc/flows.py +++ b/pipelines/datasets/br_ibge_pnadc/flows.py @@ -5,29 +5,29 @@ from datetime import timedelta +from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS -from prefect import Parameter, case from prefect.tasks.prefect import create_flow_run, wait_for_flow_run -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.constants import constants as utils_constants from pipelines.constants import constants +from pipelines.datasets.br_ibge_pnadc.schedules import every_quarter from pipelines.datasets.br_ibge_pnadc.tasks import ( - get_url_from_template, - download_txt, build_parquet_files, - save_partitions, + download_txt, + get_url_from_template, get_year_quarter, + save_partitions, ) +from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.metadata.flows import update_django_metadata from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, + rename_current_flow_run_dataset_table, ) -from pipelines.utils.metadata.flows import update_django_metadata -from pipelines.datasets.br_ibge_pnadc.schedules import every_quarter # pylint: disable=C0103 with Flow(name="br_ibge_pnadc.microdados", code_owners=["lucas_cr"]) as br_pnadc: diff --git a/pipelines/datasets/br_ibge_pnadc/tasks.py b/pipelines/datasets/br_ibge_pnadc/tasks.py index 4c9804a7e..390c9d92c 100644 --- a/pipelines/datasets/br_ibge_pnadc/tasks.py +++ b/pipelines/datasets/br_ibge_pnadc/tasks.py @@ -2,19 +2,20 @@ """ Tasks for br_ibge_pnadc """ +import os + # pylint: disable=invalid-name,unnecessary-dunder-call import zipfile -import os from glob import glob -import requests -from tqdm import tqdm -import pandas as pd import numpy as np +import pandas as pd +import requests from prefect import task +from tqdm import tqdm -from pipelines.utils.utils import log from pipelines.datasets.br_ibge_pnadc.constants import constants as pnad_constants +from pipelines.utils.utils import log @task diff --git a/pipelines/datasets/br_inmet_bdmep/flows.py b/pipelines/datasets/br_inmet_bdmep/flows.py index 9277c658b..8f21d36e8 100644 --- a/pipelines/datasets/br_inmet_bdmep/flows.py +++ b/pipelines/datasets/br_inmet_bdmep/flows.py @@ -3,27 +3,25 @@ Flows for br_inmet_bdmep """ -from prefect.run_configs import KubernetesRun -from prefect.storage import GCS from datetime import timedelta + from prefect import Parameter, case +from prefect.run_configs import KubernetesRun +from prefect.storage import GCS from prefect.tasks.prefect import create_flow_run, wait_for_flow_run -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.constants import constants as utils_constants from pipelines.constants import constants -from pipelines.datasets.br_inmet_bdmep.tasks import ( - get_base_inmet, -) +from pipelines.datasets.br_inmet_bdmep.schedules import every_month_inmet +from pipelines.datasets.br_inmet_bdmep.tasks import get_base_inmet +from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, + rename_current_flow_run_dataset_table, ) -from pipelines.datasets.br_inmet_bdmep.schedules import every_month_inmet - # from pipelines.datasets.br_ibge_pnadc.schedules import every_quarter # pylint: disable=C0103 diff --git a/pipelines/datasets/br_inmet_bdmep/schedules.py b/pipelines/datasets/br_inmet_bdmep/schedules.py index d89df7466..ac5a6c842 100644 --- a/pipelines/datasets/br_inmet_bdmep/schedules.py +++ b/pipelines/datasets/br_inmet_bdmep/schedules.py @@ -71,11 +71,11 @@ from datetime import datetime + from prefect.schedules import Schedule -from prefect.schedules.clocks import IntervalClock -from pipelines.constants import constants -from prefect.schedules.clocks import CronClock +from prefect.schedules.clocks import CronClock, IntervalClock +from pipelines.constants import constants every_month_inmet = Schedule( clocks=[ diff --git a/pipelines/datasets/br_inmet_bdmep/tasks.py b/pipelines/datasets/br_inmet_bdmep/tasks.py index bae6ad11a..d13785593 100644 --- a/pipelines/datasets/br_inmet_bdmep/tasks.py +++ b/pipelines/datasets/br_inmet_bdmep/tasks.py @@ -2,22 +2,21 @@ """ Tasks for br_inmet_bdmep """ -from pipelines.utils.utils import ( - log, -) -from pipelines.datasets.br_inmet_bdmep.utils import ( - get_clima_info, - download_inmet, - year_list, -) -from pipelines.constants import constants - -import pandas as pd +import glob import os + import numpy as np -import glob +import pandas as pd from prefect import task + +from pipelines.constants import constants from pipelines.datasets.br_inmet_bdmep.constants import constants as inmet_constants +from pipelines.datasets.br_inmet_bdmep.utils import ( + download_inmet, + get_clima_info, + year_list, +) +from pipelines.utils.utils import log # pylint: disable=C0103 diff --git a/pipelines/datasets/br_inmet_bdmep/utils.py b/pipelines/datasets/br_inmet_bdmep/utils.py index 3cd2b8786..41fc11519 100644 --- a/pipelines/datasets/br_inmet_bdmep/utils.py +++ b/pipelines/datasets/br_inmet_bdmep/utils.py @@ -5,14 +5,15 @@ # pylint: disable=too-few-public-methods,invalid-name -import pandas as pd +import os +import re import tempfile import urllib.request import zipfile -import os -import numpy as np -import re from datetime import datetime, time + +import numpy as np +import pandas as pd from unidecode import unidecode diff --git a/pipelines/datasets/br_jota/flows.py b/pipelines/datasets/br_jota/flows.py index 69ccabac2..31bf31dc6 100644 --- a/pipelines/datasets/br_jota/flows.py +++ b/pipelines/datasets/br_jota/flows.py @@ -10,12 +10,11 @@ from prefect.tasks.prefect import create_flow_run, wait_for_flow_run from pipelines.constants import constants -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import get_current_flow_labels - with Flow( name="br_jota.eleicao_perfil_candidato_2022", code_owners=["lauris"] ) as eleicao_perfil_candidato_2022: diff --git a/pipelines/datasets/br_jota/schedules.py b/pipelines/datasets/br_jota/schedules.py index 1b9145550..a56e174ff 100644 --- a/pipelines/datasets/br_jota/schedules.py +++ b/pipelines/datasets/br_jota/schedules.py @@ -3,9 +3,11 @@ Schedules for br_jota """ -from datetime import timedelta, datetime +from datetime import datetime, timedelta + from prefect.schedules import Schedule, filters from prefect.schedules.clocks import IntervalClock + from pipelines.constants import constants schedule_candidatos = Schedule( diff --git a/pipelines/datasets/br_me_caged/flows.py b/pipelines/datasets/br_me_caged/flows.py index d9758b719..c1320e097 100644 --- a/pipelines/datasets/br_me_caged/flows.py +++ b/pipelines/datasets/br_me_caged/flows.py @@ -2,16 +2,17 @@ """ Flows for br_me_novo_caged """ -# pylint: disable=invalid-name -from prefect.storage import GCS from prefect import Parameter from prefect.run_configs import KubernetesRun -from pipelines.utils.decorators import Flow +# pylint: disable=invalid-name +from prefect.storage import GCS + from pipelines.constants import constants +from pipelines.datasets.br_me_caged.schedules import every_month from pipelines.datasets.br_me_caged.tasks import build_partitions, get_caged_data +from pipelines.utils.decorators import Flow from pipelines.utils.tasks import create_table_and_upload_to_gcs -from pipelines.datasets.br_me_caged.schedules import every_month with Flow("br_me_caged.microdados_mov", code_owners=["lucas_cr"]) as cagedmov: dataset_id = Parameter("dataset_id", default="br_me_caged", required=True) diff --git a/pipelines/datasets/br_me_caged/schedules.py b/pipelines/datasets/br_me_caged/schedules.py index 7bc9fa09a..16b0d63e6 100644 --- a/pipelines/datasets/br_me_caged/schedules.py +++ b/pipelines/datasets/br_me_caged/schedules.py @@ -2,9 +2,11 @@ """ Schedules for br_me_novo_caged """ -from datetime import timedelta, datetime +from datetime import datetime, timedelta + from prefect.schedules import Schedule from prefect.schedules.clocks import IntervalClock + from pipelines.constants import constants every_month = Schedule( diff --git a/pipelines/datasets/br_me_caged/tasks.py b/pipelines/datasets/br_me_caged/tasks.py index f9d1318ac..037baf4cb 100644 --- a/pipelines/datasets/br_me_caged/tasks.py +++ b/pipelines/datasets/br_me_caged/tasks.py @@ -2,16 +2,17 @@ """ Tasks for br_me_novo_caged """ +import os + # pylint: disable=invalid-name import re -import os -from glob import glob from datetime import timedelta +from glob import glob -from prefect import task import pandas as pd -from unidecode import unidecode +from prefect import task from tqdm import tqdm +from unidecode import unidecode from pipelines.constants import constants diff --git a/pipelines/datasets/br_me_cnpj/flows.py b/pipelines/datasets/br_me_cnpj/flows.py index 5ed223cfe..030ed018f 100644 --- a/pipelines/datasets/br_me_cnpj/flows.py +++ b/pipelines/datasets/br_me_cnpj/flows.py @@ -3,41 +3,38 @@ Flows for br_me_cnpj """ from datetime import timedelta -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants + from prefect import Parameter, case -from prefect.tasks.prefect import ( - create_flow_run, - wait_for_flow_run, -) -from pipelines.utils.constants import constants as utils_constants -from pipelines.datasets.br_me_cnpj.constants import ( - constants as constants_cnpj, -) from prefect.run_configs import KubernetesRun from prefect.storage import GCS +from prefect.tasks.prefect import create_flow_run, wait_for_flow_run + from pipelines.constants import constants -from pipelines.datasets.br_me_cnpj.tasks import ( - calculate_defasagem, - format_date_to_string, - check_for_updates, - main, -) +from pipelines.datasets.br_me_cnpj.constants import constants as constants_cnpj from pipelines.datasets.br_me_cnpj.schedules import ( every_day_empresas, - every_day_socios, every_day_estabelecimentos, every_day_simples, + every_day_socios, +) +from pipelines.datasets.br_me_cnpj.tasks import ( + calculate_defasagem, + check_for_updates, + format_date_to_string, + main, ) +from pipelines.datasets.br_me_cnpj.utils import data_url +from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.metadata.tasks import update_django_metadata from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, - log_task, log, + log_task, + rename_current_flow_run_dataset_table, ) -from pipelines.utils.metadata.tasks import update_django_metadata -from pipelines.datasets.br_me_cnpj.utils import data_url with Flow( name="br_me_cnpj.empresas", diff --git a/pipelines/datasets/br_me_cnpj/schedules.py b/pipelines/datasets/br_me_cnpj/schedules.py index 610e61d5d..a54355c5b 100644 --- a/pipelines/datasets/br_me_cnpj/schedules.py +++ b/pipelines/datasets/br_me_cnpj/schedules.py @@ -3,10 +3,13 @@ Schedules for br_me_cnpj """ -############################################################################### -from prefect.schedules.clocks import CronClock from datetime import datetime + from prefect.schedules import Schedule + +############################################################################### +from prefect.schedules.clocks import CronClock + from pipelines.constants import constants every_day_empresas = Schedule( diff --git a/pipelines/datasets/br_me_cnpj/tasks.py b/pipelines/datasets/br_me_cnpj/tasks.py index 70c1ac83e..ae10bf18d 100644 --- a/pipelines/datasets/br_me_cnpj/tasks.py +++ b/pipelines/datasets/br_me_cnpj/tasks.py @@ -2,25 +2,25 @@ """ Tasks for br_me_cnpj """ -from pipelines.utils.utils import extract_last_date, log +import os +from datetime import datetime + +import pandas as pd +import requests from prefect import task -from pipelines.datasets.br_me_cnpj.constants import ( - constants as constants_cnpj, -) +from tqdm import tqdm + +from pipelines.datasets.br_me_cnpj.constants import constants as constants_cnpj from pipelines.datasets.br_me_cnpj.utils import ( data_url, destino_output, download_unzip_csv, - process_csv_estabelecimentos, - process_csv_socios, process_csv_empresas, + process_csv_estabelecimentos, process_csv_simples, + process_csv_socios, ) -import os -import requests -import pandas as pd -from datetime import datetime -from tqdm import tqdm +from pipelines.utils.utils import extract_last_date, log ufs = constants_cnpj.UFS.value url = constants_cnpj.URL.value diff --git a/pipelines/datasets/br_me_cnpj/utils.py b/pipelines/datasets/br_me_cnpj/utils.py index 7d201b4c3..b7804602c 100644 --- a/pipelines/datasets/br_me_cnpj/utils.py +++ b/pipelines/datasets/br_me_cnpj/utils.py @@ -2,19 +2,19 @@ """ General purpose functions for the br_me_cnpj project """ -from pipelines.datasets.br_me_cnpj.constants import ( - constants as constants_cnpj, -) -from pipelines.utils.utils import log -import requests -import pandas as pd -from bs4 import BeautifulSoup -from datetime import datetime import os import zipfile -from tqdm import tqdm -import pyarrow.parquet as pq +from datetime import datetime + +import pandas as pd import pyarrow as pa +import pyarrow.parquet as pq +import requests +from bs4 import BeautifulSoup +from tqdm import tqdm + +from pipelines.datasets.br_me_cnpj.constants import constants as constants_cnpj +from pipelines.utils.utils import log ufs = constants_cnpj.UFS.value headers = constants_cnpj.HEADERS.value diff --git a/pipelines/datasets/br_me_comex_stat/flows.py b/pipelines/datasets/br_me_comex_stat/flows.py index 7edf8b613..376310173 100644 --- a/pipelines/datasets/br_me_comex_stat/flows.py +++ b/pipelines/datasets/br_me_comex_stat/flows.py @@ -11,29 +11,27 @@ from prefect.tasks.prefect import create_flow_run, wait_for_flow_run from pipelines.constants import constants +from pipelines.datasets.br_me_comex_stat.constants import constants as comex_constants +from pipelines.datasets.br_me_comex_stat.schedules import ( + schedule_municipio_exportacao, + schedule_municipio_importacao, + schedule_ncm_exportacao, + schedule_ncm_importacao, +) from pipelines.datasets.br_me_comex_stat.tasks import ( - download_br_me_comex_stat, clean_br_me_comex_stat, + download_br_me_comex_stat, ) -from pipelines.datasets.br_me_comex_stat.constants import constants as comex_constants from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( - update_django_metadata, - rename_current_flow_run_dataset_table, - get_current_flow_labels, create_table_and_upload_to_gcs, + get_current_flow_labels, + rename_current_flow_run_dataset_table, + update_django_metadata, ) -from pipelines.datasets.br_me_comex_stat.schedules import ( - schedule_municipio_exportacao, - schedule_municipio_importacao, - schedule_ncm_exportacao, - schedule_ncm_importacao, -) - - with Flow( name="br_me_comex_stat.municipio_exportacao", code_owners=["Gabriel Pisa"] ) as br_comex_municipio_exportacao: diff --git a/pipelines/datasets/br_me_comex_stat/schedules.py b/pipelines/datasets/br_me_comex_stat/schedules.py index 5066f20c4..0f28294fa 100644 --- a/pipelines/datasets/br_me_comex_stat/schedules.py +++ b/pipelines/datasets/br_me_comex_stat/schedules.py @@ -5,7 +5,8 @@ """ from datetime import datetime -from prefect.schedules import Schedule, filters, adjustments + +from prefect.schedules import Schedule, adjustments, filters from prefect.schedules.clocks import CronClock from pipelines.constants import constants diff --git a/pipelines/datasets/br_me_comex_stat/tasks.py b/pipelines/datasets/br_me_comex_stat/tasks.py index e0bb923e5..1e46e5652 100644 --- a/pipelines/datasets/br_me_comex_stat/tasks.py +++ b/pipelines/datasets/br_me_comex_stat/tasks.py @@ -3,25 +3,21 @@ """ Tasks for br_me_comex_stat """ +import os +import time as tm + # pylint: disable=invalid-name,too-many-nested-blocks from zipfile import ZipFile -import time as tm -import pandas as pd + +import basedosdados as bd import numpy as np +import pandas as pd from prefect import task -import basedosdados as bd -import os - - -from pipelines.datasets.br_me_comex_stat.utils import create_paths, download_data -from pipelines.datasets.br_me_comex_stat.constants import constants as comex_constants from pipelines.constants import constants - -from pipelines.utils.utils import ( - log, - to_partitions, -) +from pipelines.datasets.br_me_comex_stat.constants import constants as comex_constants +from pipelines.datasets.br_me_comex_stat.utils import create_paths, download_data +from pipelines.utils.utils import log, to_partitions @task diff --git a/pipelines/datasets/br_me_comex_stat/utils.py b/pipelines/datasets/br_me_comex_stat/utils.py index 6fc4b1f03..4d76a92a2 100644 --- a/pipelines/datasets/br_me_comex_stat/utils.py +++ b/pipelines/datasets/br_me_comex_stat/utils.py @@ -2,12 +2,12 @@ """ Utils for the Brazilian Comex Stat pipeline. """ # pylint: disable=invalid-name import os -import wget import time as tm + +import wget from tqdm import tqdm -from pipelines.utils.utils import ( - log, -) + +from pipelines.utils.utils import log def create_paths( diff --git a/pipelines/datasets/br_mercadolivre_ofertas/decorators.py b/pipelines/datasets/br_mercadolivre_ofertas/decorators.py index 4949fb79c..a52addaaa 100644 --- a/pipelines/datasets/br_mercadolivre_ofertas/decorators.py +++ b/pipelines/datasets/br_mercadolivre_ofertas/decorators.py @@ -3,8 +3,9 @@ Custom decorators for pipelines. """ import asyncio -from bs4 import BeautifulSoup + import requests +from bs4 import BeautifulSoup def retry(content_function): diff --git a/pipelines/datasets/br_mercadolivre_ofertas/flows.py b/pipelines/datasets/br_mercadolivre_ofertas/flows.py index b354709c9..3e30f5929 100644 --- a/pipelines/datasets/br_mercadolivre_ofertas/flows.py +++ b/pipelines/datasets/br_mercadolivre_ofertas/flows.py @@ -2,6 +2,8 @@ """ Flows for mercadolivre_ofertas """ +import datetime + # pylint: disable=invalid-name from datetime import timedelta @@ -11,26 +13,24 @@ from prefect.tasks.prefect import create_flow_run, wait_for_flow_run from pipelines.constants import constants - -from pipelines.utils.constants import constants as utils_constants -from pipelines.utils.decorators import Flow -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.tasks import ( - rename_current_flow_run_dataset_table, - get_current_flow_labels, - create_table_and_upload_to_gcs, -) -from pipelines.utils.metadata.tasks import update_django_metadata +from pipelines.datasets.br_mercadolivre_ofertas.schedules import every_day_item from pipelines.datasets.br_mercadolivre_ofertas.tasks import ( - crawler_mercadolivre_item, - crawler_mercadolivre_seller, clean_item, clean_seller, + crawler_mercadolivre_item, + crawler_mercadolivre_seller, get_today_sellers, is_empty_list, ) -from pipelines.datasets.br_mercadolivre_ofertas.schedules import every_day_item -import datetime +from pipelines.utils.constants import constants as utils_constants +from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.metadata.tasks import update_django_metadata +from pipelines.utils.tasks import ( + create_table_and_upload_to_gcs, + get_current_flow_labels, + rename_current_flow_run_dataset_table, +) with Flow( name="br_mercadolivre_ofertas.item", code_owners=["Gabs"] diff --git a/pipelines/datasets/br_mercadolivre_ofertas/schedules.py b/pipelines/datasets/br_mercadolivre_ofertas/schedules.py index 5c4087726..1d15d65b1 100644 --- a/pipelines/datasets/br_mercadolivre_ofertas/schedules.py +++ b/pipelines/datasets/br_mercadolivre_ofertas/schedules.py @@ -10,7 +10,6 @@ from pipelines.constants import constants - every_day_item = Schedule( clocks=[ IntervalClock( diff --git a/pipelines/datasets/br_mercadolivre_ofertas/tasks.py b/pipelines/datasets/br_mercadolivre_ofertas/tasks.py index 83e6a9af8..58ad3753b 100644 --- a/pipelines/datasets/br_mercadolivre_ofertas/tasks.py +++ b/pipelines/datasets/br_mercadolivre_ofertas/tasks.py @@ -4,23 +4,23 @@ import asyncio -import time import os +import time from typing import List, Tuple -from prefect import task import pandas as pd +from prefect import task -from pipelines.utils.tasks import log from pipelines.datasets.br_mercadolivre_ofertas.constants import ( constants as const_mercadolivre, ) from pipelines.datasets.br_mercadolivre_ofertas.utils import ( + clean_experience, + get_id, main_item, main_seller, - get_id, - clean_experience, ) +from pipelines.utils.tasks import log new_cols_item = const_mercadolivre.NEW_ORDER_COLS.value new_order_clean = const_mercadolivre.NEW_ORDER_CLEAN.value diff --git a/pipelines/datasets/br_mercadolivre_ofertas/utils.py b/pipelines/datasets/br_mercadolivre_ofertas/utils.py index 49a9d00cc..dc4355709 100644 --- a/pipelines/datasets/br_mercadolivre_ofertas/utils.py +++ b/pipelines/datasets/br_mercadolivre_ofertas/utils.py @@ -3,17 +3,19 @@ import asyncio import hashlib +import json import re from datetime import datetime -from pipelines.utils.tasks import log + +import Levenshtein +import pandas as pd import requests -from tqdm import tqdm from bs4 import BeautifulSoup from fake_useragent import UserAgent -import Levenshtein -import pandas as pd +from tqdm import tqdm + from pipelines.datasets.br_mercadolivre_ofertas.decorators import retry -import json +from pipelines.utils.tasks import log ua = UserAgent() diff --git a/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/flows.py b/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/flows.py index 23acfb63f..aa4e41429 100644 --- a/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/flows.py +++ b/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/flows.py @@ -3,34 +3,34 @@ Flows for br_mg_belohorizonte_smfa_iptu """ from datetime import timedelta + from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS from prefect.tasks.prefect import create_flow_run, wait_for_flow_run -from pipelines.utils.metadata.tasks import update_django_metadata + from pipelines.constants import constants -from pipelines.utils.constants import constants as utils_constants -from pipelines.utils.decorators import Flow -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.datasets.br_mg_belohorizonte_smfa_iptu.constants import ( constants as constants_iptu, ) +from pipelines.datasets.br_mg_belohorizonte_smfa_iptu.schedules import every_weeks_iptu from pipelines.datasets.br_mg_belohorizonte_smfa_iptu.tasks import ( + check_for_updates, download_and_transform, - make_partitions, get_max_data, - check_for_updates, + make_partitions, ) - +from pipelines.utils.constants import constants as utils_constants +from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.metadata.tasks import update_django_metadata from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, log_task, + rename_current_flow_run_dataset_table, ) -from pipelines.datasets.br_mg_belohorizonte_smfa_iptu.schedules import every_weeks_iptu - with Flow( name="br_mg_belohorizonte_smfa_iptu.iptu", code_owners=["trick"] ) as br_mg_belohorizonte_smfa_iptu_iptu: diff --git a/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/schedules.py b/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/schedules.py index 2fce25f52..34b841bd7 100644 --- a/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/schedules.py +++ b/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/schedules.py @@ -3,9 +3,11 @@ Schedules for br_mg_belohorizonte_smfa_iptu """ -from datetime import timedelta, datetime +from datetime import datetime, timedelta + from prefect.schedules import Schedule from prefect.schedules.clocks import IntervalClock + from pipelines.constants import constants every_weeks_iptu = Schedule( diff --git a/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/tasks.py b/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/tasks.py index d5180c5fe..909987818 100644 --- a/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/tasks.py +++ b/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/tasks.py @@ -2,22 +2,24 @@ """ Tasks for br_mg_belohorizonte_smfa_iptu """ -from prefect import task +import os + import requests from bs4 import BeautifulSoup +from prefect import task + from pipelines.datasets.br_mg_belohorizonte_smfa_iptu.constants import constants -from pipelines.utils.utils import extract_last_date, log, to_partitions -import os from pipelines.datasets.br_mg_belohorizonte_smfa_iptu.utils import ( - scrapping_download_csv, + changing_coordinates, concat_csv, - rename_columns, fix_variables, new_column_endereco, new_columns_ano_mes, + rename_columns, reorder_and_fix_nan, - changing_coordinates, + scrapping_download_csv, ) +from pipelines.utils.utils import extract_last_date, log, to_partitions @task # noqa diff --git a/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/utils.py b/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/utils.py index 7437bc12b..c915c6310 100644 --- a/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/utils.py +++ b/pipelines/datasets/br_mg_belohorizonte_smfa_iptu/utils.py @@ -3,15 +3,17 @@ General purpose functions for the br_mg_belohorizonte_smfa_iptu project """ -import pandas as pd import os + +import geopandas as gpd +import numpy as np +import pandas as pd import requests from bs4 import BeautifulSoup -import numpy as np -import geopandas as gpd from shapely import wkt -from pipelines.utils.tasks import log + from pipelines.datasets.br_mg_belohorizonte_smfa_iptu.constants import constants +from pipelines.utils.tasks import log def scrapping_download_csv(input_path: str): diff --git a/pipelines/datasets/br_mp_pep_cargos_funcoes/flows.py b/pipelines/datasets/br_mp_pep_cargos_funcoes/flows.py index bfe1ba91d..7851451e6 100644 --- a/pipelines/datasets/br_mp_pep_cargos_funcoes/flows.py +++ b/pipelines/datasets/br_mp_pep_cargos_funcoes/flows.py @@ -10,27 +10,25 @@ from prefect.storage import GCS from prefect.tasks.prefect import create_flow_run, wait_for_flow_run - from pipelines.constants import constants -from pipelines.utils.tasks import ( - rename_current_flow_run_dataset_table, - create_table_and_upload_to_gcs, - get_current_flow_labels, -) -from pipelines.utils.metadata.tasks import update_django_metadata -from pipelines.utils.utils import log_task -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.constants import constants as utils_constants -from pipelines.utils.decorators import Flow - from pipelines.datasets.br_mp_pep_cargos_funcoes.schedules import every_month from pipelines.datasets.br_mp_pep_cargos_funcoes.tasks import ( - setup_web_driver, - scraper, clean_data, - make_partitions, is_up_to_date, + make_partitions, + scraper, + setup_web_driver, +) +from pipelines.utils.constants import constants as utils_constants +from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.metadata.tasks import update_django_metadata +from pipelines.utils.tasks import ( + create_table_and_upload_to_gcs, + get_current_flow_labels, + rename_current_flow_run_dataset_table, ) +from pipelines.utils.utils import log_task with Flow( name="br_mp_pep.cargos_funcoes", diff --git a/pipelines/datasets/br_mp_pep_cargos_funcoes/schedules.py b/pipelines/datasets/br_mp_pep_cargos_funcoes/schedules.py index b7ed10811..5c93e941e 100644 --- a/pipelines/datasets/br_mp_pep_cargos_funcoes/schedules.py +++ b/pipelines/datasets/br_mp_pep_cargos_funcoes/schedules.py @@ -4,8 +4,10 @@ """ from datetime import datetime + from prefect.schedules import Schedule from prefect.schedules.clocks import CronClock + from pipelines.constants import constants every_month = Schedule( diff --git a/pipelines/datasets/br_mp_pep_cargos_funcoes/tasks.py b/pipelines/datasets/br_mp_pep_cargos_funcoes/tasks.py index b5a93c810..64f32dbc6 100644 --- a/pipelines/datasets/br_mp_pep_cargos_funcoes/tasks.py +++ b/pipelines/datasets/br_mp_pep_cargos_funcoes/tasks.py @@ -3,26 +3,26 @@ Tasks for br_mp_pep_cargos_funcoes """ +import datetime +import io import os import time -import requests import zipfile -import io + import pandas as pd -import datetime +import requests +from prefect import task from selenium import webdriver -from selenium.webdriver.common.by import By from selenium.common.exceptions import ElementNotInteractableException +from selenium.webdriver.common.by import By -from prefect import task - -from pipelines.utils.utils import log, to_partitions, extract_last_date from pipelines.datasets.br_mp_pep_cargos_funcoes.constants import constants from pipelines.datasets.br_mp_pep_cargos_funcoes.utils import ( - wait_file_download, - move_from_tmp_dir, get_normalized_values_by_col, + move_from_tmp_dir, + wait_file_download, ) +from pipelines.utils.utils import extract_last_date, log, to_partitions @task diff --git a/pipelines/datasets/br_mp_pep_cargos_funcoes/utils.py b/pipelines/datasets/br_mp_pep_cargos_funcoes/utils.py index d2d695a99..6b54a0986 100644 --- a/pipelines/datasets/br_mp_pep_cargos_funcoes/utils.py +++ b/pipelines/datasets/br_mp_pep_cargos_funcoes/utils.py @@ -5,6 +5,7 @@ import os import time + from pipelines.datasets.br_mp_pep_cargos_funcoes.constants import constants from pipelines.utils.utils import log diff --git a/pipelines/datasets/br_ms_cnes/flows.py b/pipelines/datasets/br_ms_cnes/flows.py index c65d8e69a..ba001cf98 100644 --- a/pipelines/datasets/br_ms_cnes/flows.py +++ b/pipelines/datasets/br_ms_cnes/flows.py @@ -6,43 +6,42 @@ from datetime import timedelta from prefect import Parameter, case -from pipelines.constants import constants from prefect.run_configs import KubernetesRun from prefect.storage import GCS from prefect.tasks.prefect import create_flow_run, wait_for_flow_run +from pipelines.constants import constants from pipelines.datasets.br_ms_cnes.constants import constants as br_ms_cnes_constants +from pipelines.datasets.br_ms_cnes.schedules import ( + schedule_br_ms_cnes_dados_complementares, + schedule_br_ms_cnes_equipamento, + schedule_br_ms_cnes_equipe, + schedule_br_ms_cnes_estabelecimento, + schedule_br_ms_cnes_estabelecimento_ensino, + schedule_br_ms_cnes_estabelecimento_filantropico, + schedule_br_ms_cnes_gestao_metas, + schedule_br_ms_cnes_habilitacao, + schedule_br_ms_cnes_incentivos, + schedule_br_ms_cnes_leito, + schedule_br_ms_cnes_profissional, + schedule_br_ms_cnes_regra_contratual, + schedule_br_ms_cnes_servico_especializado, +) from pipelines.datasets.br_ms_cnes.tasks import ( access_ftp_donwload_files, - read_dbc_save_csv, - is_empty, check_files_to_parse, + is_empty, + read_dbc_save_csv, ) from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.metadata.flows import update_django_metadata from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, log_task, -) - -from pipelines.utils.metadata.flows import update_django_metadata -from pipelines.datasets.br_ms_cnes.schedules import ( - schedule_br_ms_cnes_estabelecimento, - schedule_br_ms_cnes_profissional, - schedule_br_ms_cnes_equipe, - schedule_br_ms_cnes_leito, - schedule_br_ms_cnes_equipamento, - schedule_br_ms_cnes_estabelecimento_ensino, - schedule_br_ms_cnes_dados_complementares, - schedule_br_ms_cnes_estabelecimento_filantropico, - schedule_br_ms_cnes_gestao_metas, - schedule_br_ms_cnes_habilitacao, - schedule_br_ms_cnes_incentivos, - schedule_br_ms_cnes_regra_contratual, - schedule_br_ms_cnes_servico_especializado, + rename_current_flow_run_dataset_table, ) with Flow( diff --git a/pipelines/datasets/br_ms_cnes/schedules.py b/pipelines/datasets/br_ms_cnes/schedules.py index e18d9d2ad..ae234d1ae 100644 --- a/pipelines/datasets/br_ms_cnes/schedules.py +++ b/pipelines/datasets/br_ms_cnes/schedules.py @@ -3,11 +3,12 @@ Schedules for br_ms_cnes """ -from prefect.schedules import Schedule, filters, adjustments -from prefect.schedules.clocks import CronClock from datetime import datetime -from pipelines.constants import constants +from prefect.schedules import Schedule, adjustments, filters +from prefect.schedules.clocks import CronClock + +from pipelines.constants import constants schedule_br_ms_cnes_estabelecimento = Schedule( clocks=[ diff --git a/pipelines/datasets/br_ms_cnes/tasks.py b/pipelines/datasets/br_ms_cnes/tasks.py index 3ca592a83..2c3e286d7 100644 --- a/pipelines/datasets/br_ms_cnes/tasks.py +++ b/pipelines/datasets/br_ms_cnes/tasks.py @@ -4,29 +4,28 @@ """ -from prefect import task +import os from datetime import timedelta -from pipelines.utils.utils import log -from pipelines.constants import constants import pandas as pd -import wget -import os -from rpy2.robjects.packages import importr -import rpy2.robjects.packages as rpackages import rpy2.robjects as ro +import rpy2.robjects.packages as rpackages +import wget +from prefect import task from rpy2.robjects import pandas2ri +from rpy2.robjects.packages import importr - +from pipelines.constants import constants from pipelines.datasets.br_ms_cnes.constants import constants as cnes_constants from pipelines.datasets.br_ms_cnes.utils import ( - list_all_cnes_dbc_files, - year_month_sigla_uf_parser, - pre_cleaning_to_utf8, check_and_create_column, - if_column_exist_delete, extract_last_date, + if_column_exist_delete, + list_all_cnes_dbc_files, + pre_cleaning_to_utf8, + year_month_sigla_uf_parser, ) +from pipelines.utils.utils import log @task diff --git a/pipelines/datasets/br_ms_cnes/utils.py b/pipelines/datasets/br_ms_cnes/utils.py index 277cb3a0f..a07d3da9d 100644 --- a/pipelines/datasets/br_ms_cnes/utils.py +++ b/pipelines/datasets/br_ms_cnes/utils.py @@ -4,8 +4,10 @@ """ from ftplib import FTP -import pandas as pd + import basedosdados as bd +import pandas as pd + from pipelines.utils.utils import log diff --git a/pipelines/datasets/br_ons_avaliacao_operacao/flows.py b/pipelines/datasets/br_ons_avaliacao_operacao/flows.py index 7c70f267c..2c21e96ff 100644 --- a/pipelines/datasets/br_ons_avaliacao_operacao/flows.py +++ b/pipelines/datasets/br_ons_avaliacao_operacao/flows.py @@ -11,32 +11,27 @@ from prefect.tasks.prefect import create_flow_run, wait_for_flow_run from pipelines.constants import constants -from pipelines.datasets.br_ons_avaliacao_operacao.tasks import ( - download_data, - wrang_data, -) from pipelines.datasets.br_ons_avaliacao_operacao.constants import ( constants as ons_constants, ) +from pipelines.datasets.br_ons_avaliacao_operacao.schedules import ( + schedule_br_ons_avaliacao_operacao_energia_armazenada_reservatorio, + schedule_br_ons_avaliacao_operacao_energia_natural_afluente, + schedule_br_ons_avaliacao_operacao_geracao_termica_motivo_despacho, + schedule_br_ons_avaliacao_operacao_geracao_usina, + schedule_br_ons_avaliacao_operacao_reservatorio, +) +from pipelines.datasets.br_ons_avaliacao_operacao.tasks import download_data, wrang_data from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( - rename_current_flow_run_dataset_table, - get_current_flow_labels, create_table_and_upload_to_gcs, + get_current_flow_labels, + rename_current_flow_run_dataset_table, update_django_metadata, ) -from pipelines.datasets.br_ons_avaliacao_operacao.schedules import ( - schedule_br_ons_avaliacao_operacao_reservatorio, - schedule_br_ons_avaliacao_operacao_geracao_usina, - schedule_br_ons_avaliacao_operacao_geracao_termica_motivo_despacho, - schedule_br_ons_avaliacao_operacao_energia_natural_afluente, - schedule_br_ons_avaliacao_operacao_energia_armazenada_reservatorio, -) - - with Flow( name="br_ons_avaliacao_operacao.reservatorio", code_owners=["Gabriel Pisa"] ) as br_ons_avaliacao_operacao_reservatorio: diff --git a/pipelines/datasets/br_ons_avaliacao_operacao/schedules.py b/pipelines/datasets/br_ons_avaliacao_operacao/schedules.py index e510d0529..1c7020b3f 100644 --- a/pipelines/datasets/br_ons_avaliacao_operacao/schedules.py +++ b/pipelines/datasets/br_ons_avaliacao_operacao/schedules.py @@ -4,10 +4,11 @@ """ -from prefect.schedules import Schedule, filters, adjustments -from prefect.schedules.clocks import CronClock from datetime import datetime +from prefect.schedules import Schedule, adjustments, filters +from prefect.schedules.clocks import CronClock + from pipelines.constants import constants schedule_br_ons_avaliacao_operacao_reservatorio = Schedule( diff --git a/pipelines/datasets/br_ons_avaliacao_operacao/tasks.py b/pipelines/datasets/br_ons_avaliacao_operacao/tasks.py index 3d5ac5b8d..de787aa85 100644 --- a/pipelines/datasets/br_ons_avaliacao_operacao/tasks.py +++ b/pipelines/datasets/br_ons_avaliacao_operacao/tasks.py @@ -3,25 +3,25 @@ Tasks for br_ons_avaliacao_operacao """ import os -import pandas as pd +import pandas as pd from prefect import task -from pipelines.utils.utils import ( - log, - to_partitions, -) + from pipelines.datasets.br_ons_avaliacao_operacao.constants import constants from pipelines.datasets.br_ons_avaliacao_operacao.utils import ( - create_paths, - crawler_ons, - download_data as dw, change_columns_name, - remove_latin1_accents_from_df, + crawler_ons, + create_paths, +) +from pipelines.datasets.br_ons_avaliacao_operacao.utils import download_data as dw +from pipelines.datasets.br_ons_avaliacao_operacao.utils import ( order_df, process_date_column, process_datetime_column, remove_decimal, + remove_latin1_accents_from_df, ) +from pipelines.utils.utils import log, to_partitions @task diff --git a/pipelines/datasets/br_ons_avaliacao_operacao/utils.py b/pipelines/datasets/br_ons_avaliacao_operacao/utils.py index 2f3e614f6..3ea6894d8 100644 --- a/pipelines/datasets/br_ons_avaliacao_operacao/utils.py +++ b/pipelines/datasets/br_ons_avaliacao_operacao/utils.py @@ -1,14 +1,15 @@ # -*- coding: utf-8 -*- -import wget -import requests -from bs4 import BeautifulSoup import os -import pandas as pd -from io import StringIO -from typing import List import time as tm import unicodedata +from io import StringIO +from typing import List + +import pandas as pd +import requests +import wget +from bs4 import BeautifulSoup def crawler_ons( diff --git a/pipelines/datasets/br_ons_estimativa_custos/flows.py b/pipelines/datasets/br_ons_estimativa_custos/flows.py index 9fcf1d64b..dc3b3c942 100644 --- a/pipelines/datasets/br_ons_estimativa_custos/flows.py +++ b/pipelines/datasets/br_ons_estimativa_custos/flows.py @@ -11,31 +11,26 @@ from prefect.tasks.prefect import create_flow_run, wait_for_flow_run from pipelines.constants import constants -from pipelines.datasets.br_ons_estimativa_custos.tasks import ( - download_data, - wrang_data, -) from pipelines.datasets.br_ons_estimativa_custos.constants import ( constants as ons_constants, ) +from pipelines.datasets.br_ons_estimativa_custos.schedules import ( + schedule_br_ons_estimativa_custos_balanco_energia_subsistemas, + schedule_br_ons_estimativa_custos_balanco_energia_subsistemas_dessem, + schedule_br_ons_estimativa_custos_custo_marginal_operacao_semanal, + schedule_br_ons_estimativa_custos_custo_marginal_operacao_semi_horario, +) +from pipelines.datasets.br_ons_estimativa_custos.tasks import download_data, wrang_data from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( - rename_current_flow_run_dataset_table, - get_current_flow_labels, create_table_and_upload_to_gcs, + get_current_flow_labels, + rename_current_flow_run_dataset_table, update_django_metadata, ) -from pipelines.datasets.br_ons_estimativa_custos.schedules import ( - schedule_br_ons_estimativa_custos_custo_marginal_operacao_semi_horario, - schedule_br_ons_estimativa_custos_custo_marginal_operacao_semanal, - schedule_br_ons_estimativa_custos_balanco_energia_subsistemas, - schedule_br_ons_estimativa_custos_balanco_energia_subsistemas_dessem, -) - - with Flow( name="br_ons_estimativa_custos.custo_marginal_operacao_semi_horario", code_owners=["Gabriel Pisa"], diff --git a/pipelines/datasets/br_ons_estimativa_custos/schedules.py b/pipelines/datasets/br_ons_estimativa_custos/schedules.py index 429e567d6..6f786bd4f 100644 --- a/pipelines/datasets/br_ons_estimativa_custos/schedules.py +++ b/pipelines/datasets/br_ons_estimativa_custos/schedules.py @@ -3,7 +3,7 @@ Schedules for br_ons_estimativa_custos """ -from prefect.schedules import Schedule, filters, adjustments +from prefect.schedules import Schedule, adjustments, filters from prefect.schedules.clocks import CronClock from pipelines.constants import constants diff --git a/pipelines/datasets/br_ons_estimativa_custos/tasks.py b/pipelines/datasets/br_ons_estimativa_custos/tasks.py index 399ca661f..00e91d78f 100644 --- a/pipelines/datasets/br_ons_estimativa_custos/tasks.py +++ b/pipelines/datasets/br_ons_estimativa_custos/tasks.py @@ -3,25 +3,26 @@ Tasks for br_ons_avaliacao_operacao """ import os -import pandas as pd import time as tm from datetime import datetime +import pandas as pd from prefect import task -from pipelines.utils.utils import ( - log, -) + from pipelines.datasets.br_ons_estimativa_custos.constants import constants from pipelines.datasets.br_ons_estimativa_custos.utils import ( - create_paths, - crawler_ons, - download_data as dw, change_columns_name, - remove_latin1_accents_from_df, + crawler_ons, + create_paths, +) +from pipelines.datasets.br_ons_estimativa_custos.utils import download_data as dw +from pipelines.datasets.br_ons_estimativa_custos.utils import ( order_df, process_date_column, process_datetime_column, + remove_latin1_accents_from_df, ) +from pipelines.utils.utils import log @task diff --git a/pipelines/datasets/br_ons_estimativa_custos/utils.py b/pipelines/datasets/br_ons_estimativa_custos/utils.py index 1782dd92a..eb928b9cf 100644 --- a/pipelines/datasets/br_ons_estimativa_custos/utils.py +++ b/pipelines/datasets/br_ons_estimativa_custos/utils.py @@ -3,15 +3,16 @@ General purpose functions for the br_ons_estimativa_custos project """ -import wget -import requests -from bs4 import BeautifulSoup import os -import pandas as pd -from io import StringIO -from typing import List import time as tm import unicodedata +from io import StringIO +from typing import List + +import pandas as pd +import requests +import wget +from bs4 import BeautifulSoup def crawler_ons( diff --git a/pipelines/datasets/br_poder360_pesquisas/flows.py b/pipelines/datasets/br_poder360_pesquisas/flows.py index 00600eb8e..9934fc0d2 100644 --- a/pipelines/datasets/br_poder360_pesquisas/flows.py +++ b/pipelines/datasets/br_poder360_pesquisas/flows.py @@ -5,24 +5,24 @@ from datetime import timedelta +from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS -from prefect import Parameter, case from prefect.tasks.prefect import create_flow_run, wait_for_flow_run -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.constants import constants as utils_constants from pipelines.constants import constants +from pipelines.datasets.br_poder360_pesquisas.schedules import every_monday_thursday from pipelines.datasets.br_poder360_pesquisas.tasks import crawler +from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, + get_current_flow_labels, get_temporal_coverage, - update_metadata, rename_current_flow_run_dataset_table, - get_current_flow_labels, + update_metadata, ) -from pipelines.datasets.br_poder360_pesquisas.schedules import every_monday_thursday # pylint: disable=C0103 with Flow( diff --git a/pipelines/datasets/br_poder360_pesquisas/schedules.py b/pipelines/datasets/br_poder360_pesquisas/schedules.py index ab9e49287..37a6dc1c0 100644 --- a/pipelines/datasets/br_poder360_pesquisas/schedules.py +++ b/pipelines/datasets/br_poder360_pesquisas/schedules.py @@ -10,7 +10,6 @@ from pipelines.constants import constants - every_monday_thursday = Schedule( clocks=[ IntervalClock( diff --git a/pipelines/datasets/br_poder360_pesquisas/tasks.py b/pipelines/datasets/br_poder360_pesquisas/tasks.py index 15e5edc55..b75ab7e49 100644 --- a/pipelines/datasets/br_poder360_pesquisas/tasks.py +++ b/pipelines/datasets/br_poder360_pesquisas/tasks.py @@ -2,14 +2,15 @@ """ Tasks for br_poder360_pesquisas """ -from json.decoder import JSONDecodeError -from datetime import timedelta import os +from datetime import timedelta +from json.decoder import JSONDecodeError -from prefect import task -import requests import pandas as pd +import requests +from prefect import task from tqdm import tqdm + from pipelines.constants import constants diff --git a/pipelines/datasets/br_rf_cafir/flows.py b/pipelines/datasets/br_rf_cafir/flows.py index e364acf8d..0b5a682e0 100644 --- a/pipelines/datasets/br_rf_cafir/flows.py +++ b/pipelines/datasets/br_rf_cafir/flows.py @@ -6,32 +6,30 @@ from datetime import timedelta from prefect import Parameter, case -from pipelines.constants import constants from prefect.run_configs import KubernetesRun from prefect.storage import GCS from prefect.tasks.prefect import create_flow_run, wait_for_flow_run +from pipelines.constants import constants from pipelines.datasets.br_rf_cafir.constants import constants as br_rf_cafir_constants +from pipelines.datasets.br_rf_cafir.schedules import schedule_br_rf_cafir_imoveis_rurais from pipelines.datasets.br_rf_cafir.tasks import ( - parse_files_parse_date, - parse_data, check_if_bq_data_is_outdated, convert_datetime_to_string, + parse_data, + parse_files_parse_date, ) - from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.metadata.tasks import update_django_metadata from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, log_task, + rename_current_flow_run_dataset_table, ) -from pipelines.datasets.br_rf_cafir.schedules import schedule_br_rf_cafir_imoveis_rurais - with Flow( name="br_rf_cafir.imoveis_rurais", code_owners=["Gabriel Pisa"] ) as br_rf_cafir_imoveis_rurais: diff --git a/pipelines/datasets/br_rf_cafir/schedules.py b/pipelines/datasets/br_rf_cafir/schedules.py index 19d2bb01a..531177629 100644 --- a/pipelines/datasets/br_rf_cafir/schedules.py +++ b/pipelines/datasets/br_rf_cafir/schedules.py @@ -3,11 +3,12 @@ Schedules for br_rf_cafir """ -from prefect.schedules import Schedule, filters, adjustments -from prefect.schedules.clocks import CronClock from datetime import datetime -from pipelines.constants import constants +from prefect.schedules import Schedule, adjustments, filters +from prefect.schedules.clocks import CronClock + +from pipelines.constants import constants schedule_br_rf_cafir_imoveis_rurais = Schedule( clocks=[ diff --git a/pipelines/datasets/br_rf_cafir/tasks.py b/pipelines/datasets/br_rf_cafir/tasks.py index b2565d967..0e8e986c7 100644 --- a/pipelines/datasets/br_rf_cafir/tasks.py +++ b/pipelines/datasets/br_rf_cafir/tasks.py @@ -4,23 +4,24 @@ """ -from prefect import task -from pipelines.utils.utils import log -from pipelines.constants import constants -from datetime import datetime import os +from datetime import datetime + import pandas as pd +from prefect import task +from pipelines.constants import constants from pipelines.datasets.br_rf_cafir.constants import constants as br_rf_cafir_constants from pipelines.datasets.br_rf_cafir.utils import ( - parse_date_parse_files, download_csv_files, - remove_accent, + extract_last_date, + parse_date_parse_files, preserve_zeros, + remove_accent, remove_non_ascii_from_df, strip_string, - extract_last_date, ) +from pipelines.utils.utils import log @task diff --git a/pipelines/datasets/br_rf_cafir/utils.py b/pipelines/datasets/br_rf_cafir/utils.py index 8b21db0ee..397886be9 100644 --- a/pipelines/datasets/br_rf_cafir/utils.py +++ b/pipelines/datasets/br_rf_cafir/utils.py @@ -3,14 +3,16 @@ General purpose functions for the br_ms_cnes project """ -from bs4 import BeautifulSoup -import requests -from datetime import datetime import os -from pipelines.utils.utils import log import unicodedata -import pandas as pd +from datetime import datetime + import basedosdados as bd +import pandas as pd +import requests +from bs4 import BeautifulSoup + +from pipelines.utils.utils import log # função para extrair datas # valor usado para o check de atualização do site além de ser usado para update do coverage diff --git a/pipelines/datasets/br_rj_isp_estatisticas_seguranca/flows.py b/pipelines/datasets/br_rj_isp_estatisticas_seguranca/flows.py index 34a31534a..ecdb83d03 100644 --- a/pipelines/datasets/br_rj_isp_estatisticas_seguranca/flows.py +++ b/pipelines/datasets/br_rj_isp_estatisticas_seguranca/flows.py @@ -3,46 +3,41 @@ Flows for br_rj_isp_estatisticas_seguranca. """ from datetime import timedelta + +from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS -from prefect import Parameter, case -from prefect.tasks.prefect import ( - create_flow_run, - wait_for_flow_run, -) +from prefect.tasks.prefect import create_flow_run, wait_for_flow_run + from pipelines.constants import constants -from pipelines.utils.tasks import update_django_metadata -from pipelines.utils.constants import constants as utils_constants +from pipelines.datasets.br_rj_isp_estatisticas_seguranca.constants import ( + constants as isp_constants, +) +from pipelines.datasets.br_rj_isp_estatisticas_seguranca.schedules import ( + every_month_armas_apreendidas_mensal, + every_month_evolucao_mensal_cisp, + every_month_evolucao_mensal_municipio, + every_month_evolucao_mensal_uf, + every_month_evolucao_policial_morto_servico_mensal, + every_month_feminicidio_mensal_cisp, + every_month_taxa_evolucao_mensal_municipio, + every_month_taxa_evolucao_mensal_uf, +) from pipelines.datasets.br_rj_isp_estatisticas_seguranca.tasks import ( - download_files, clean_data, + download_files, get_today_date, ) - +from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, + rename_current_flow_run_dataset_table, + update_django_metadata, ) -from pipelines.datasets.br_rj_isp_estatisticas_seguranca.constants import ( - constants as isp_constants, -) - -from pipelines.datasets.br_rj_isp_estatisticas_seguranca.schedules import ( - every_month_evolucao_mensal_cisp, - every_month_taxa_evolucao_mensal_uf, - every_month_taxa_evolucao_mensal_municipio, - every_month_feminicidio_mensal_cisp, - every_month_evolucao_policial_morto_servico_mensal, - every_month_armas_apreendidas_mensal, - every_month_evolucao_mensal_municipio, - every_month_evolucao_mensal_uf, -) - - # ! Evolucao_mensal_cisp with Flow( name="br_rj_isp_estatisticas_seguranca.evolucao_mensal_cisp", diff --git a/pipelines/datasets/br_rj_isp_estatisticas_seguranca/schedules.py b/pipelines/datasets/br_rj_isp_estatisticas_seguranca/schedules.py index f3685b995..b52e281a4 100644 --- a/pipelines/datasets/br_rj_isp_estatisticas_seguranca/schedules.py +++ b/pipelines/datasets/br_rj_isp_estatisticas_seguranca/schedules.py @@ -3,9 +3,11 @@ Schedules for br_rj_isp_estatisticas_seguranca """ -from datetime import timedelta, datetime +from datetime import datetime, timedelta + from prefect.schedules import Schedule from prefect.schedules.clocks import IntervalClock + from pipelines.constants import constants # ! Schedules tabela evolucao_mensal_cisp diff --git a/pipelines/datasets/br_rj_isp_estatisticas_seguranca/tasks.py b/pipelines/datasets/br_rj_isp_estatisticas_seguranca/tasks.py index 7dced300b..629691445 100644 --- a/pipelines/datasets/br_rj_isp_estatisticas_seguranca/tasks.py +++ b/pipelines/datasets/br_rj_isp_estatisticas_seguranca/tasks.py @@ -1,22 +1,21 @@ # -*- coding: utf-8 -*- -import pandas as pd import os -import requests from datetime import datetime, timedelta + +import pandas as pd +import requests from prefect import task -from pipelines.utils.utils import ( - log, +from pipelines.constants import constants +from pipelines.datasets.br_rj_isp_estatisticas_seguranca.constants import ( + constants as isp_constants, ) from pipelines.datasets.br_rj_isp_estatisticas_seguranca.utils import ( change_columns_name, - create_columns_order, check_tipo_fase, + create_columns_order, ) -from pipelines.datasets.br_rj_isp_estatisticas_seguranca.constants import ( - constants as isp_constants, -) -from pipelines.constants import constants +from pipelines.utils.utils import log @task( diff --git a/pipelines/datasets/br_rj_isp_estatisticas_seguranca/utils.py b/pipelines/datasets/br_rj_isp_estatisticas_seguranca/utils.py index 09954b35c..3332cbcbc 100644 --- a/pipelines/datasets/br_rj_isp_estatisticas_seguranca/utils.py +++ b/pipelines/datasets/br_rj_isp_estatisticas_seguranca/utils.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- -import pandas as pd from io import StringIO +from typing import Dict, List + +import pandas as pd import requests -from typing import List -from typing import Dict # build a dict that maps a table name to a architectura and # another dict that maps an original table name to a diff --git a/pipelines/datasets/br_sp_saopaulo_dieese_icv/flows.py b/pipelines/datasets/br_sp_saopaulo_dieese_icv/flows.py index f61b42310..b14663eec 100644 --- a/pipelines/datasets/br_sp_saopaulo_dieese_icv/flows.py +++ b/pipelines/datasets/br_sp_saopaulo_dieese_icv/flows.py @@ -6,25 +6,23 @@ from datetime import timedelta +from prefect import Parameter, case from prefect.run_configs import KubernetesRun from prefect.storage import GCS -from prefect import Parameter, case from prefect.tasks.prefect import create_flow_run, wait_for_flow_run from pipelines.constants import constants -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.datasets.br_sp_saopaulo_dieese_icv.schedules import every_month +from pipelines.datasets.br_sp_saopaulo_dieese_icv.tasks import clean_dieese_icv from pipelines.utils.constants import constants as utils_constants -from pipelines.datasets.br_sp_saopaulo_dieese_icv.tasks import ( - clean_dieese_icv, -) from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - update_metadata, - rename_current_flow_run_dataset_table, get_current_flow_labels, + rename_current_flow_run_dataset_table, + update_metadata, ) -from pipelines.datasets.br_sp_saopaulo_dieese_icv.schedules import every_month with Flow( name="br_sp_saopaulo_dieese_icv.mes", code_owners=["crislanealves"] diff --git a/pipelines/datasets/br_sp_saopaulo_dieese_icv/schedules.py b/pipelines/datasets/br_sp_saopaulo_dieese_icv/schedules.py index db5befc33..35314bffe 100644 --- a/pipelines/datasets/br_sp_saopaulo_dieese_icv/schedules.py +++ b/pipelines/datasets/br_sp_saopaulo_dieese_icv/schedules.py @@ -3,9 +3,11 @@ Schedules for br_sp_saopaulo_dieese_icv """ -from datetime import timedelta, datetime -from prefect.schedules import Schedule, filters, adjustments +from datetime import datetime, timedelta + +from prefect.schedules import Schedule, adjustments, filters from prefect.schedules.clocks import IntervalClock + from pipelines.constants import constants every_month = Schedule( diff --git a/pipelines/datasets/br_sp_saopaulo_dieese_icv/tasks.py b/pipelines/datasets/br_sp_saopaulo_dieese_icv/tasks.py index 02fa4a3a6..ffcf3e602 100644 --- a/pipelines/datasets/br_sp_saopaulo_dieese_icv/tasks.py +++ b/pipelines/datasets/br_sp_saopaulo_dieese_icv/tasks.py @@ -3,8 +3,9 @@ Tasks for br_sp_saopaulo_dieese_icv """ import os -import pandas as pd + import ipeadatapy as idpy +import pandas as pd from prefect import task diff --git a/pipelines/datasets/br_tse_eleicoes/flows.py b/pipelines/datasets/br_tse_eleicoes/flows.py index 09774bcf3..979d1126a 100644 --- a/pipelines/datasets/br_tse_eleicoes/flows.py +++ b/pipelines/datasets/br_tse_eleicoes/flows.py @@ -6,34 +6,32 @@ from datetime import timedelta from prefect import Parameter, case -from prefect.tasks.prefect import create_flow_run, wait_for_flow_run from prefect.run_configs import KubernetesRun from prefect.storage import GCS +from prefect.tasks.prefect import create_flow_run, wait_for_flow_run from pipelines.constants import constants -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.constants import constants as utils_constants -from pipelines.utils.decorators import Flow +from pipelines.datasets.br_tse_eleicoes.constants import constants as tse_constants from pipelines.datasets.br_tse_eleicoes.tasks import ( - download_before22, - get_csv_files, - build_candidatos, - clean_candidatos22, build_bens_candidato, + build_candidatos, clean_bens22, + clean_candidatos22, clean_despesa22, clean_receita22, + download_before22, + get_csv_files, ) - +from pipelines.utils.constants import constants as utils_constants +from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, + get_current_flow_labels, rename_current_flow_run_dataset_table, update_metadata, - get_current_flow_labels, ) -from pipelines.datasets.br_tse_eleicoes.constants import constants as tse_constants - with Flow( name="br_tse_eleicoes.candidatos", code_owners=["lucas_cr"] ) as br_tse_candidatos: diff --git a/pipelines/datasets/br_tse_eleicoes/schedules.py b/pipelines/datasets/br_tse_eleicoes/schedules.py index fa91dc140..804f16eb8 100644 --- a/pipelines/datasets/br_tse_eleicoes/schedules.py +++ b/pipelines/datasets/br_tse_eleicoes/schedules.py @@ -3,9 +3,11 @@ Schedules for br_tse_eleicoes """ -from datetime import timedelta, datetime +from datetime import datetime, timedelta + from prefect.schedules import Schedule, filters from prefect.schedules.clocks import IntervalClock + from pipelines.constants import constants schedule_bens = Schedule( diff --git a/pipelines/datasets/br_tse_eleicoes/tasks.py b/pipelines/datasets/br_tse_eleicoes/tasks.py index a1f46aa3c..6ed34bfa5 100644 --- a/pipelines/datasets/br_tse_eleicoes/tasks.py +++ b/pipelines/datasets/br_tse_eleicoes/tasks.py @@ -2,29 +2,31 @@ """ Tasks for br_tse_eleicoes """ +import os +import re +import zipfile + # pylint: disable=invalid-name,line-too-long from datetime import timedelta -import zipfile -import os from glob import glob from itertools import product -import re -import requests -from unidecode import unidecode -from tqdm import tqdm import numpy as np import pandas as pd +import requests from prefect import task +from tqdm import tqdm +from unidecode import unidecode + from pipelines.constants import constants -from pipelines.utils.utils import log from pipelines.datasets.br_tse_eleicoes.utils import ( - get_id_candidato_bd, + clean_digit_id, get_blobs_from_raw, - normalize_dahis, get_data_from_prod, - clean_digit_id, + get_id_candidato_bd, + normalize_dahis, ) +from pipelines.utils.utils import log @task( diff --git a/pipelines/datasets/cross_update/flows.py b/pipelines/datasets/cross_update/flows.py index f89b2dae6..532478b24 100644 --- a/pipelines/datasets/cross_update/flows.py +++ b/pipelines/datasets/cross_update/flows.py @@ -4,7 +4,7 @@ """ # pylint: disable=invalid-name,line-too-long -from prefect import Parameter, unmapped, case +from prefect import Parameter, case, unmapped from prefect.run_configs import KubernetesRun from prefect.storage import GCS from prefect.tasks.prefect import create_flow_run, wait_for_flow_run @@ -12,10 +12,10 @@ from pipelines.constants import constants from pipelines.datasets.cross_update.schedules import schedule_nrows from pipelines.datasets.cross_update.tasks import ( - datasearch_json, crawler_tables, - update_nrows, + datasearch_json, rename_blobs, + update_nrows, ) from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow diff --git a/pipelines/datasets/cross_update/schedules.py b/pipelines/datasets/cross_update/schedules.py index 6d81e68d5..78374f2b5 100644 --- a/pipelines/datasets/cross_update/schedules.py +++ b/pipelines/datasets/cross_update/schedules.py @@ -3,7 +3,7 @@ Schedules for br_tse_eleicoes """ -from datetime import timedelta, datetime +from datetime import datetime, timedelta from prefect.schedules import Schedule from prefect.schedules.clocks import IntervalClock diff --git a/pipelines/datasets/cross_update/tasks.py b/pipelines/datasets/cross_update/tasks.py index 09e755c93..ec63c54e1 100644 --- a/pipelines/datasets/cross_update/tasks.py +++ b/pipelines/datasets/cross_update/tasks.py @@ -7,7 +7,7 @@ from datetime import timedelta # pylint: disable=invalid-name, too-many-locals -from typing import List, Dict, Tuple +from typing import Dict, List, Tuple import basedosdados as bd import ruamel.yaml as ryaml diff --git a/pipelines/datasets/delete_flows/flows.py b/pipelines/datasets/delete_flows/flows.py index 9ff375176..b3a2dd022 100644 --- a/pipelines/datasets/delete_flows/flows.py +++ b/pipelines/datasets/delete_flows/flows.py @@ -6,13 +6,13 @@ from prefect.run_configs import KubernetesRun from prefect.storage import GCS +from pipelines.constants import constants from pipelines.datasets.delete_flows.schedules import daily_at_3am from pipelines.datasets.delete_flows.tasks import ( delete_flow_run, get_old_flows_runs, get_prefect_client, ) -from pipelines.constants import constants from pipelines.utils.decorators import Flow with Flow( diff --git a/pipelines/datasets/delete_flows/schedules.py b/pipelines/datasets/delete_flows/schedules.py index 607acf438..84a60e816 100644 --- a/pipelines/datasets/delete_flows/schedules.py +++ b/pipelines/datasets/delete_flows/schedules.py @@ -3,15 +3,14 @@ Schedules for the daily cleanup flow. """ -from datetime import timedelta, datetime +from datetime import datetime, timedelta +import pytz from prefect.schedules import Schedule from prefect.schedules.clocks import IntervalClock -import pytz from pipelines.constants import constants - daily_at_3am = Schedule( clocks=[ IntervalClock( diff --git a/pipelines/datasets/delete_flows/tasks.py b/pipelines/datasets/delete_flows/tasks.py index 8aa52ba75..66c0c699e 100644 --- a/pipelines/datasets/delete_flows/tasks.py +++ b/pipelines/datasets/delete_flows/tasks.py @@ -3,9 +3,11 @@ Tasks for delete_flows """ from typing import Dict, List + +import pendulum from prefect import task from prefect.client import Client -import pendulum + from pipelines.utils.utils import log diff --git a/pipelines/datasets/fundacao_lemann/flows.py b/pipelines/datasets/fundacao_lemann/flows.py index 876cc5688..da26a3fed 100644 --- a/pipelines/datasets/fundacao_lemann/flows.py +++ b/pipelines/datasets/fundacao_lemann/flows.py @@ -11,14 +11,12 @@ from prefect.tasks.prefect import create_flow_run, wait_for_flow_run from pipelines.constants import constants +from pipelines.datasets.fundacao_lemann.schedules import every_year from pipelines.utils.constants import constants as utils_constants from pipelines.utils.decorators import Flow from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants - from pipelines.utils.tasks import get_current_flow_labels -from pipelines.datasets.fundacao_lemann.schedules import every_year - with Flow( name="fundacao_lemann.ano_escola_serie_educacao_aprendizagem_adequada", code_owners=["crislanealves"], diff --git a/pipelines/datasets/fundacao_lemann/schedules.py b/pipelines/datasets/fundacao_lemann/schedules.py index 7321598c8..c80812165 100644 --- a/pipelines/datasets/fundacao_lemann/schedules.py +++ b/pipelines/datasets/fundacao_lemann/schedules.py @@ -3,9 +3,11 @@ Schedules for fundacao_lemann """ -from datetime import timedelta, datetime -from prefect.schedules import Schedule, filters, adjustments +from datetime import datetime, timedelta + +from prefect.schedules import Schedule, adjustments, filters from prefect.schedules.clocks import IntervalClock + from pipelines.constants import constants every_year = Schedule( diff --git a/pipelines/datasets/mundo_transfermarkt_competicoes/constants.py b/pipelines/datasets/mundo_transfermarkt_competicoes/constants.py index 59fdc6b2e..2ef29374b 100644 --- a/pipelines/datasets/mundo_transfermarkt_competicoes/constants.py +++ b/pipelines/datasets/mundo_transfermarkt_competicoes/constants.py @@ -4,8 +4,8 @@ """ ############################################################################### -from enum import Enum import datetime +from enum import Enum class constants(Enum): # pylint: disable=c0103 diff --git a/pipelines/datasets/mundo_transfermarkt_competicoes/decorators.py b/pipelines/datasets/mundo_transfermarkt_competicoes/decorators.py index 9d9962076..d06911a77 100644 --- a/pipelines/datasets/mundo_transfermarkt_competicoes/decorators.py +++ b/pipelines/datasets/mundo_transfermarkt_competicoes/decorators.py @@ -3,8 +3,9 @@ Custom decorators for pipelines. """ import asyncio -from bs4 import BeautifulSoup + import requests +from bs4 import BeautifulSoup def retry(content_function): diff --git a/pipelines/datasets/mundo_transfermarkt_competicoes/flows.py b/pipelines/datasets/mundo_transfermarkt_competicoes/flows.py index fc6815d2a..4bba77dd6 100644 --- a/pipelines/datasets/mundo_transfermarkt_competicoes/flows.py +++ b/pipelines/datasets/mundo_transfermarkt_competicoes/flows.py @@ -2,41 +2,41 @@ """ Flows for mundo_transfermarkt_competicoes """ +from datetime import timedelta + +from prefect import Parameter, case +from prefect.run_configs import KubernetesRun +from prefect.storage import GCS +from prefect.tasks.prefect import create_flow_run, wait_for_flow_run + +from pipelines.constants import constants + ############################################################################### from pipelines.datasets.mundo_transfermarkt_competicoes.constants import ( constants as mundo_constants, ) +from pipelines.datasets.mundo_transfermarkt_competicoes.schedules import ( + every_week, + every_week_copa, +) from pipelines.datasets.mundo_transfermarkt_competicoes.tasks import ( - make_partitions, - get_max_data, execucao_coleta_sync, + get_max_data, + make_partitions, ) from pipelines.datasets.mundo_transfermarkt_competicoes.utils import ( execucao_coleta, execucao_coleta_copa, ) -from pipelines.datasets.mundo_transfermarkt_competicoes.schedules import ( - every_week, - every_week_copa, -) +from pipelines.utils.constants import constants as utils_constants +from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants +from pipelines.utils.metadata.tasks import update_django_metadata from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - rename_current_flow_run_dataset_table, get_current_flow_labels, + rename_current_flow_run_dataset_table, ) -from prefect.run_configs import KubernetesRun -from prefect.storage import GCS -from pipelines.constants import constants -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.decorators import Flow -from prefect import Parameter, case -from prefect.tasks.prefect import ( - create_flow_run, - wait_for_flow_run, -) -from pipelines.utils.constants import constants as utils_constants -from pipelines.utils.metadata.tasks import update_django_metadata -from datetime import timedelta with Flow( name="mundo_transfermarkt_competicoes.brasileirao_serie_a", diff --git a/pipelines/datasets/mundo_transfermarkt_competicoes/schedules.py b/pipelines/datasets/mundo_transfermarkt_competicoes/schedules.py index cc4e740a4..21a923e16 100644 --- a/pipelines/datasets/mundo_transfermarkt_competicoes/schedules.py +++ b/pipelines/datasets/mundo_transfermarkt_competicoes/schedules.py @@ -5,11 +5,12 @@ ############################################################################### -from prefect.schedules.clocks import CronClock from datetime import datetime + from prefect.schedules import Schedule -from pipelines.constants import constants +from prefect.schedules.clocks import CronClock +from pipelines.constants import constants every_week = Schedule( clocks=[ diff --git a/pipelines/datasets/mundo_transfermarkt_competicoes/tasks.py b/pipelines/datasets/mundo_transfermarkt_competicoes/tasks.py index e968be59a..6e30293b2 100644 --- a/pipelines/datasets/mundo_transfermarkt_competicoes/tasks.py +++ b/pipelines/datasets/mundo_transfermarkt_competicoes/tasks.py @@ -3,19 +3,21 @@ Tasks for mundo_transfermarkt_competicoes """ +import asyncio + +import numpy as np +import pandas as pd +from prefect import task + ############################################################################### from pipelines.datasets.mundo_transfermarkt_competicoes.constants import ( constants as mundo_constants, ) from pipelines.datasets.mundo_transfermarkt_competicoes.utils import ( - execucao_coleta_copa, execucao_coleta, + execucao_coleta_copa, ) from pipelines.utils.utils import log, to_partitions -from prefect import task -import numpy as np -import pandas as pd -import asyncio @task diff --git a/pipelines/datasets/mundo_transfermarkt_competicoes/utils.py b/pipelines/datasets/mundo_transfermarkt_competicoes/utils.py index 8e6f79ce0..1f7a0d552 100644 --- a/pipelines/datasets/mundo_transfermarkt_competicoes/utils.py +++ b/pipelines/datasets/mundo_transfermarkt_competicoes/utils.py @@ -5,15 +5,17 @@ ############################################################################### import re -from bs4 import BeautifulSoup -import requests + import numpy as np import pandas as pd +import requests +from bs4 import BeautifulSoup + from pipelines.datasets.mundo_transfermarkt_competicoes.constants import ( constants as mundo_constants, ) -from pipelines.utils.utils import log from pipelines.datasets.mundo_transfermarkt_competicoes.decorators import retry +from pipelines.utils.utils import log @retry diff --git a/pipelines/datasets/test_pipeline/flows.py b/pipelines/datasets/test_pipeline/flows.py index 99e304ae1..1ca7f97af 100644 --- a/pipelines/datasets/test_pipeline/flows.py +++ b/pipelines/datasets/test_pipeline/flows.py @@ -64,8 +64,8 @@ from pipelines.constants import constants from pipelines.datasets.test_pipeline.tasks import ( - get_random_expression, dataframe_to_csv, + get_random_expression, upload_to_gcs, ) from pipelines.utils.decorators import Flow diff --git a/pipelines/datasets/test_pipeline/schedules.py b/pipelines/datasets/test_pipeline/schedules.py index c03b80428..39dd88ead 100644 --- a/pipelines/datasets/test_pipeline/schedules.py +++ b/pipelines/datasets/test_pipeline/schedules.py @@ -70,9 +70,11 @@ ############################################################################### -from datetime import timedelta, datetime +from datetime import datetime, timedelta + from prefect.schedules import Schedule from prefect.schedules.clocks import IntervalClock + from pipelines.constants import constants every_five_minutes = Schedule( diff --git a/pipelines/datasets/test_pipeline/tasks.py b/pipelines/datasets/test_pipeline/tasks.py index df7c44664..6b289b9a6 100644 --- a/pipelines/datasets/test_pipeline/tasks.py +++ b/pipelines/datasets/test_pipeline/tasks.py @@ -9,12 +9,11 @@ from pathlib import Path from typing import Union - import basedosdados as bd import numpy as np import pandas as pd -from prefect import task import requests +from prefect import task from pipelines.utils.utils import log diff --git a/pipelines/utils/__init__.py b/pipelines/utils/__init__.py index 04148cebb..0ec9b9aea 100644 --- a/pipelines/utils/__init__.py +++ b/pipelines/utils/__init__.py @@ -8,9 +8,9 @@ from pipelines.utils.crawler_ibge_inflacao.flows import * from pipelines.utils.dump_to_gcs.flows import * from pipelines.utils.execute_dbt_model.flows import * -from pipelines.utils.traceroute.flows import * -from pipelines.utils.temporal_coverage_updater.flows import * # from pipelines.utils.crawler_fgv_igp.flows import * # from pipelines.utils.apply_architecture_to_dataframe.flows import * from pipelines.utils.metadata.flows import * +from pipelines.utils.temporal_coverage_updater.flows import * +from pipelines.utils.traceroute.flows import * diff --git a/pipelines/utils/apply_architecture_to_dataframe/utils.py b/pipelines/utils/apply_architecture_to_dataframe/utils.py index 998dd58d5..7c6532ab7 100644 --- a/pipelines/utils/apply_architecture_to_dataframe/utils.py +++ b/pipelines/utils/apply_architecture_to_dataframe/utils.py @@ -5,9 +5,11 @@ from io import StringIO + import numpy as np import pandas as pd import requests + from pipelines.utils.utils import log diff --git a/pipelines/utils/crawler_ibge_inflacao/flows.py b/pipelines/utils/crawler_ibge_inflacao/flows.py index bb27deda1..402a21cd1 100644 --- a/pipelines/utils/crawler_ibge_inflacao/flows.py +++ b/pipelines/utils/crawler_ibge_inflacao/flows.py @@ -6,27 +6,27 @@ from datetime import timedelta from prefect import Parameter, case -from prefect.tasks.prefect import create_flow_run, wait_for_flow_run from prefect.run_configs import KubernetesRun from prefect.storage import GCS +from prefect.tasks.prefect import create_flow_run, wait_for_flow_run -from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants -from pipelines.utils.constants import constants as utils_constants from pipelines.constants import constants +from pipelines.utils.constants import constants as utils_constants from pipelines.utils.crawler_ibge_inflacao.tasks import ( - crawler, clean_mes_brasil, - clean_mes_rm, - clean_mes_municipio, clean_mes_geral, + clean_mes_municipio, + clean_mes_rm, + crawler, ) from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants from pipelines.utils.tasks import ( create_table_and_upload_to_gcs, - update_metadata, + get_current_flow_labels, get_temporal_coverage, rename_current_flow_run_dataset_table, - get_current_flow_labels, + update_metadata, ) with Flow( diff --git a/pipelines/utils/crawler_ibge_inflacao/utils.py b/pipelines/utils/crawler_ibge_inflacao/utils.py index 81f96a7d7..3e95f3393 100644 --- a/pipelines/utils/crawler_ibge_inflacao/utils.py +++ b/pipelines/utils/crawler_ibge_inflacao/utils.py @@ -7,9 +7,9 @@ from datetime import datetime import requests -from prefect.schedules import Schedule, filters, adjustments -from prefect.schedules.clocks import CronClock import urllib3 +from prefect.schedules import Schedule, adjustments, filters +from prefect.schedules.clocks import CronClock from pipelines.constants import constants diff --git a/pipelines/utils/custom.py b/pipelines/utils/custom.py index 36f3fed4b..26dbbe690 100644 --- a/pipelines/utils/custom.py +++ b/pipelines/utils/custom.py @@ -18,7 +18,6 @@ from prefect.storage import Storage from pipelines.constants import constants - from pipelines.utils.utils import notify_discord_on_failure diff --git a/pipelines/utils/dump_to_gcs/tasks.py b/pipelines/utils/dump_to_gcs/tasks.py index 8f1e8a3d8..acc887780 100644 --- a/pipelines/utils/dump_to_gcs/tasks.py +++ b/pipelines/utils/dump_to_gcs/tasks.py @@ -6,10 +6,10 @@ from time import sleep from typing import Union +import jinja2 from basedosdados.download.base import google_client from basedosdados.upload.base import Base from google.cloud import bigquery -import jinja2 from prefect import task from pipelines.utils.dump_to_gcs.constants import constants as dump_to_gcs_constants diff --git a/pipelines/utils/execute_dbt_model/flows.py b/pipelines/utils/execute_dbt_model/flows.py index 1f331d591..66ae2f99b 100644 --- a/pipelines/utils/execute_dbt_model/flows.py +++ b/pipelines/utils/execute_dbt_model/flows.py @@ -9,12 +9,8 @@ from pipelines.constants import constants from pipelines.utils.constants import constants as utils_constants -from pipelines.utils.execute_dbt_model.tasks import ( - get_k8s_dbt_client, - run_dbt_model, -) - from pipelines.utils.decorators import Flow +from pipelines.utils.execute_dbt_model.tasks import get_k8s_dbt_client, run_dbt_model from pipelines.utils.tasks import rename_current_flow_run_dataset_table with Flow(name=utils_constants.FLOW_EXECUTE_DBT_MODEL_NAME.value) as run_dbt_model_flow: diff --git a/pipelines/utils/execute_dbt_model/tasks.py b/pipelines/utils/execute_dbt_model/tasks.py index 93c738be6..5c005e88f 100644 --- a/pipelines/utils/execute_dbt_model/tasks.py +++ b/pipelines/utils/execute_dbt_model/tasks.py @@ -9,10 +9,8 @@ from dbt_client import DbtClient from prefect import task -from pipelines.utils.execute_dbt_model.utils import ( - get_dbt_client, -) from pipelines.constants import constants +from pipelines.utils.execute_dbt_model.utils import get_dbt_client @task( diff --git a/pipelines/utils/execute_dbt_model/utils.py b/pipelines/utils/execute_dbt_model/utils.py index c3c850acf..0f1f10989 100644 --- a/pipelines/utils/execute_dbt_model/utils.py +++ b/pipelines/utils/execute_dbt_model/utils.py @@ -2,7 +2,7 @@ """ General utilities for interacting with dbt-rpc """ -from datetime import timedelta, datetime +from datetime import datetime, timedelta from typing import List from dbt_client import DbtClient diff --git a/pipelines/utils/metadata/flows.py b/pipelines/utils/metadata/flows.py index e461635aa..72cf7e04e 100644 --- a/pipelines/utils/metadata/flows.py +++ b/pipelines/utils/metadata/flows.py @@ -3,19 +3,20 @@ Flows for temporal_coverage_updater """ +from prefect import Parameter from prefect.run_configs import KubernetesRun from prefect.storage import GCS + from pipelines.constants import constants + +# from pipelines.datasets.temporal_coverage_updater.schedules import every_two_weeks +from pipelines.utils.decorators import Flow from pipelines.utils.metadata.tasks import ( - update_django_metadata, get_today_date, test_ids, + update_django_metadata, ) -# from pipelines.datasets.temporal_coverage_updater.schedules import every_two_weeks -from pipelines.utils.decorators import Flow -from prefect import Parameter - # from pipelines.utils.utils import log with Flow( diff --git a/pipelines/utils/metadata/tasks.py b/pipelines/utils/metadata/tasks.py index b3593fe54..6c1969e43 100644 --- a/pipelines/utils/metadata/tasks.py +++ b/pipelines/utils/metadata/tasks.py @@ -3,20 +3,22 @@ Tasks for metadata """ -from prefect import task from datetime import datetime -from pipelines.utils.utils import log, get_credentials_from_secret + +from dateutil.relativedelta import relativedelta +from prefect import task + from pipelines.utils.metadata.utils import ( - get_ids, - parse_temporal_coverage, - get_credentials_utils, create_update, - extract_last_update, extract_last_date, + extract_last_update, + get_credentials_utils, get_first_date, get_id, + get_ids, + parse_temporal_coverage, ) -from dateutil.relativedelta import relativedelta +from pipelines.utils.utils import get_credentials_from_secret, log @task diff --git a/pipelines/utils/metadata/utils.py b/pipelines/utils/metadata/utils.py index ea004cad4..b2dae8094 100644 --- a/pipelines/utils/metadata/utils.py +++ b/pipelines/utils/metadata/utils.py @@ -4,6 +4,7 @@ """ import json +import re # pylint: disable=too-many-arguments from datetime import datetime @@ -13,10 +14,8 @@ import numpy as np import pandas as pd import requests -from pipelines.utils.utils import log, get_credentials_from_secret - -import re +from pipelines.utils.utils import get_credentials_from_secret, log ####################### diff --git a/pipelines/utils/tasks.py b/pipelines/utils/tasks.py index d39cc2d94..cd1300cf1 100644 --- a/pipelines/utils/tasks.py +++ b/pipelines/utils/tasks.py @@ -4,9 +4,9 @@ """ # pylint: disable=C0103, C0301, invalid-name, E1101, R0913 -from datetime import timedelta, datetime +from datetime import datetime, timedelta from pathlib import Path -from typing import Any, Union, List +from typing import Any, List, Union import basedosdados as bd import pandas as pd @@ -18,17 +18,17 @@ from pipelines.constants import constants from pipelines.utils.utils import ( - dump_header_to_csv, - get_ids, - parse_temporal_coverage, - get_credentials_utils, create_update, - extract_last_update, + dump_header_to_csv, extract_last_date, - get_first_date, - log, + extract_last_update, get_credentials_from_secret, + get_credentials_utils, + get_first_date, + get_ids, get_token, + log, + parse_temporal_coverage, ) diff --git a/pipelines/utils/temporal_coverage_updater/flows.py b/pipelines/utils/temporal_coverage_updater/flows.py index 862503b45..a424382f0 100644 --- a/pipelines/utils/temporal_coverage_updater/flows.py +++ b/pipelines/utils/temporal_coverage_updater/flows.py @@ -3,16 +3,15 @@ Flows for temporal_coverage_updater """ +from prefect import Parameter from prefect.run_configs import KubernetesRun from prefect.storage import GCS + from pipelines.constants import constants -from pipelines.utils.tasks import ( - update_django_metadata, -) # from pipelines.datasets.temporal_coverage_updater.schedules import every_two_weeks from pipelines.utils.decorators import Flow -from prefect import Parameter +from pipelines.utils.tasks import update_django_metadata # from pipelines.utils.utils import log diff --git a/pipelines/utils/temporal_coverage_updater/tasks.py b/pipelines/utils/temporal_coverage_updater/tasks.py index 2e305f1bb..39a2b004b 100644 --- a/pipelines/utils/temporal_coverage_updater/tasks.py +++ b/pipelines/utils/temporal_coverage_updater/tasks.py @@ -4,19 +4,19 @@ """ -from prefect import task - # from basedosdados.upload.base import Base import basedosdados as bd +from prefect import task + from pipelines.utils.temporal_coverage_updater.utils import ( - find_ids, - parse_temporal_coverage, - get_credentials, create_update, extract_last_update, + find_ids, + get_credentials, get_first_date, + parse_temporal_coverage, ) -from pipelines.utils.utils import log, get_credentials_from_secret +from pipelines.utils.utils import get_credentials_from_secret, log ## TODO: Transformar flow em task OK diff --git a/pipelines/utils/temporal_coverage_updater/utils.py b/pipelines/utils/temporal_coverage_updater/utils.py index 02dcd1771..5eace0ee8 100644 --- a/pipelines/utils/temporal_coverage_updater/utils.py +++ b/pipelines/utils/temporal_coverage_updater/utils.py @@ -3,15 +3,17 @@ General purpose functions for the temporal_coverage_updater project """ import json -import requests -from datetime import datetime import re +from datetime import datetime +from typing import Tuple + import basedosdados as bd +import requests + from pipelines.utils.temporal_coverage_updater.constants import ( constants as temp_constants, ) -from typing import Tuple -from pipelines.utils.utils import log, get_credentials_from_secret +from pipelines.utils.utils import get_credentials_from_secret, log def get_first_date(ids, email, password): diff --git a/pipelines/utils/traceroute/tasks.py b/pipelines/utils/traceroute/tasks.py index c213dda76..6c98ebe1f 100644 --- a/pipelines/utils/traceroute/tasks.py +++ b/pipelines/utils/traceroute/tasks.py @@ -5,13 +5,9 @@ from prefect import task -from pipelines.utils.traceroute.utils import ( - get_ip_location, - traceroute, -) +from pipelines.utils.traceroute.utils import get_ip_location, traceroute from pipelines.utils.utils import log - # pylint: disable=invalid-name diff --git a/pipelines/utils/traceroute/utils.py b/pipelines/utils/traceroute/utils.py index 3ffc0b793..3f257be22 100644 --- a/pipelines/utils/traceroute/utils.py +++ b/pipelines/utils/traceroute/utils.py @@ -8,7 +8,6 @@ import requests - # pylint: disable=invalid-name diff --git a/pipelines/utils/utils.py b/pipelines/utils/utils.py index 1c1147b99..d7a2acce1 100644 --- a/pipelines/utils/utils.py +++ b/pipelines/utils/utils.py @@ -7,6 +7,7 @@ # pylint: disable=too-many-arguments import logging +import re from datetime import datetime from os import getenv, walk from os.path import join @@ -31,8 +32,6 @@ from pipelines.constants import constants -import re - def log(msg: Any, level: str = "info") -> None: """ diff --git a/pipelines/{{cookiecutter.project_name}}/flows.py b/pipelines/{{cookiecutter.project_name}}/flows.py index de97e5bba..039cd2e70 100644 --- a/pipelines/{{cookiecutter.project_name}}/flows.py +++ b/pipelines/{{cookiecutter.project_name}}/flows.py @@ -60,10 +60,12 @@ from prefect.run_configs import KubernetesRun from prefect.storage import GCS + from pipelines.constants import constants -from pipelines.{{cookiecutter.project_name}}.tasks import say_hello + # from pipelines.{{cookiecutter.project_name}}.schedules import every_two_weeks from pipelines.utils.decorators import Flow +from pipelines.{{cookiecutter.project_name}}.tasks import say_hello with Flow( name="my_flow", diff --git a/pipelines/{{cookiecutter.project_name}}/schedules.py b/pipelines/{{cookiecutter.project_name}}/schedules.py index fb6e12cc8..e31ab8120 100644 --- a/pipelines/{{cookiecutter.project_name}}/schedules.py +++ b/pipelines/{{cookiecutter.project_name}}/schedules.py @@ -70,9 +70,11 @@ ############################################################################### -from datetime import timedelta, datetime +from datetime import datetime, timedelta + from prefect.schedules import Schedule from prefect.schedules.clocks import IntervalClock + from pipelines.constants import constants every_two_weeks = Schedule( diff --git a/pipelines/{{cookiecutter.project_name}}/{{cookiecutter.workspace_name}}/flows.py b/pipelines/{{cookiecutter.project_name}}/{{cookiecutter.workspace_name}}/flows.py index 6ff6737fb..ab4c4a3ef 100644 --- a/pipelines/{{cookiecutter.project_name}}/{{cookiecutter.workspace_name}}/flows.py +++ b/pipelines/{{cookiecutter.project_name}}/{{cookiecutter.workspace_name}}/flows.py @@ -61,10 +61,14 @@ from prefect.run_configs import KubernetesRun from prefect.storage import GCS + from pipelines.constants import constants -from pipelines.{{cookiecutter.project_name}}.{{cookiecutter.workspace_name}}.tasks import say_hello + # from pipelines.{{cookiecutter.project_name}}.{{cookiecutter.workspace_name}}.schedules import every_two_weeks from pipelines.utils.decorators import Flow +from pipelines.{{cookiecutter.project_name}}.{{cookiecutter.workspace_name}}.tasks import ( + say_hello, +) with Flow( name="my_flow", diff --git a/pipelines/{{cookiecutter.project_name}}/{{cookiecutter.workspace_name}}/schedules.py b/pipelines/{{cookiecutter.project_name}}/{{cookiecutter.workspace_name}}/schedules.py index 196b6073b..1ae3efef8 100644 --- a/pipelines/{{cookiecutter.project_name}}/{{cookiecutter.workspace_name}}/schedules.py +++ b/pipelines/{{cookiecutter.project_name}}/{{cookiecutter.workspace_name}}/schedules.py @@ -70,9 +70,11 @@ ############################################################################### -from datetime import timedelta, datetime +from datetime import datetime, timedelta + from prefect.schedules import Schedule from prefect.schedules.clocks import IntervalClock + from pipelines.constants import constants every_two_weeks = Schedule( diff --git a/tests/test_igp.py b/tests/test_igp.py index 18743bf26..5acf5da71 100644 --- a/tests/test_igp.py +++ b/tests/test_igp.py @@ -9,7 +9,6 @@ from pipelines.datasets.br_fgv_igp.utils import IGPData - # pylint: disable=invalid-name, redefined-outer-name