Skip to content

Commit

Permalink
Merge branch 'master' into staging/including-metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurfg authored Sep 27, 2023
2 parents a41181a + f33d5f5 commit 2d37128
Show file tree
Hide file tree
Showing 173 changed files with 1,048 additions and 1,007 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/scripts/code_tree_analysis.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
import ast
from pathlib import Path
import sys
from pathlib import Path
from typing import List, Tuple, Union

import networkx as nx
from prefect import Flow
import yaml
from prefect import Flow

message_id = 0

Expand Down
25 changes: 6 additions & 19 deletions .github/workflows/scripts/register_flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,28 @@
"""

import ast
from collections import (
Counter,
defaultdict,
)
import glob
import hashlib
import json
import os
from pathlib import Path
import runpy
import sys
from time import sleep
import traceback
from typing import (
Dict,
List,
Tuple,
Union,
)
from collections import Counter, defaultdict
from pathlib import Path
from time import sleep
from typing import Dict, List, Tuple, Union

import box
from loguru import logger
import prefect
from loguru import logger
from prefect.run_configs import UniversalRun
from prefect.storage import Local
from prefect.utilities.graphql import (
compress,
EnumValue,
with_args,
)
from prefect.utilities.graphql import EnumValue, compress, with_args
from typer import Typer

import pipelines # DO NOT REMOVE THIS LINE


app = Typer()
FlowLike = Union[box.Box, "prefect.Flow"]

Expand Down
1 change: 0 additions & 1 deletion .github/workflows/scripts/replace_docker_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from sys import argv, exit
from typing import List


FILE_PATH = Path("./pipelines/constants.py")
REPLACE_TAG = "AUTO_REPLACE_DOCKER_TAG"
REPLACE_IMAGE = "AUTO_REPLACE_DOCKER_IMAGE"
Expand Down
11 changes: 11 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ repos:
- id: fix-encoding-pragma # fixes encoding pragma
- id: no-commit-to-branch # prevents committing to protected branches
- id: trailing-whitespace # prevents trailing whitespace
- repo: https://github.com/python-poetry/poetry
rev: 1.6.0
hooks:
- id: poetry-check
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
args: [--profile, black, --skip, "pipelines/{{cookiecutter.project_name}}"]
- repo: https://github.com/psf/black
rev: 23.9.1
hooks:
Expand All @@ -17,7 +26,9 @@ repos:
rev: v2.2.1
hooks:
- id: autoflake
exclude: 'pipelines\/\{\{cookiecutter\.project_name\}\}.*'
- repo: https://github.com/PyCQA/flake8
rev: 6.1.0
hooks:
- id: flake8
exclude: 'pipelines\/\{\{cookiecutter\.project_name\}\}.*'
2 changes: 1 addition & 1 deletion manage.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
import pkgutil
from os import path
from pathlib import Path
import pkgutil
from sys import exit
from uuid import uuid4

Expand Down
60 changes: 30 additions & 30 deletions pipelines/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,44 +6,44 @@
# Automatically managed, please do not touch
###############################################################################

from pipelines.datasets.botdosdados.flows import *
from pipelines.datasets.br_anatel_banda_larga_fixa.flows import *
from pipelines.datasets.br_anatel_telefonia_movel.flows import *
from pipelines.datasets.br_anp_precos_combustiveis.flows import *
from pipelines.datasets.br_ans_beneficiario.flows import *
from pipelines.datasets.br_b3_cotacoes.flows import *
from pipelines.datasets.br_bcb_agencia.flows import *
from pipelines.datasets.br_bcb_estban.flows import *
from pipelines.datasets.br_bcb_taxa_cambio.flows import *
from pipelines.datasets.br_bcb_taxa_selic.flows import *
from pipelines.datasets.br_bd_indicadores.flows import *
from pipelines.datasets.br_bd_metadados.flows import *
from pipelines.datasets.br_cgu_pessoal_executivo_federal.flows import *
from pipelines.datasets.br_cvm_administradores_carteira.flows import *
from pipelines.datasets.br_cvm_fi.flows import *
from pipelines.datasets.br_cvm_oferta_publica_distribuicao.flows import *
from pipelines.datasets.br_me_comex_stat.flows import *
from pipelines.datasets.br_fgv_igp.flows import *
from pipelines.datasets.br_ibge_inpc.flows import *
from pipelines.datasets.br_ibge_ipca.flows import *
from pipelines.datasets.br_ibge_ipca15.flows import *
from pipelines.datasets.br_sp_saopaulo_dieese_icv.flows import *
from pipelines.datasets.br_bd_indicadores.flows import *
from pipelines.datasets.br_bd_metadados.flows import *
from pipelines.datasets.br_poder360_pesquisas.flows import *
from pipelines.datasets.br_ibge_ipca.flows import *
from pipelines.datasets.br_ibge_pnadc.flows import *
from pipelines.datasets.br_inmet_bdmep.flows import *
from pipelines.datasets.botdosdados.flows import *
from pipelines.datasets.br_cgu_pessoal_executivo_federal.flows import *
from pipelines.datasets.fundacao_lemann.flows import *
from pipelines.datasets.br_tse_eleicoes.flows import *
from pipelines.datasets.delete_flows.flows import *
from pipelines.datasets.br_jota.flows import *
from pipelines.datasets.br_fgv_igp.flows import *
from pipelines.datasets.br_me_caged.flows import *
from pipelines.datasets.br_ibge_pnadc.flows import *
from pipelines.datasets.cross_update.flows import *
from pipelines.datasets.br_bcb_estban.flows import *
from pipelines.datasets.br_me_cnpj.flows import *
from pipelines.datasets.br_me_comex_stat.flows import *
from pipelines.datasets.br_mercadolivre_ofertas.flows import *
from pipelines.datasets.br_mg_belohorizonte_smfa_iptu.flows import *
from pipelines.datasets.br_mp_pep_cargos_funcoes.flows import *
from pipelines.datasets.br_ms_cnes.flows import *
from pipelines.datasets.br_rj_isp_estatisticas_seguranca.flows import *
from pipelines.datasets.br_anatel_banda_larga_fixa.flows import *
from pipelines.datasets.br_bcb_agencia.flows import *
from pipelines.datasets.br_cvm_fi.flows import *
from pipelines.datasets.br_ons_avaliacao_operacao.flows import *
from pipelines.datasets.br_ons_estimativa_custos.flows import *
from pipelines.datasets.br_b3_cotacoes.flows import *
from pipelines.datasets.br_anatel_telefonia_movel.flows import *
from pipelines.datasets.br_mercadolivre_ofertas.flows import *
from pipelines.datasets.br_bcb_taxa_cambio.flows import *
from pipelines.datasets.br_bcb_taxa_selic.flows import *
from pipelines.datasets.mundo_transfermarkt_competicoes.flows import *
from pipelines.datasets.br_me_cnpj.flows import *
from pipelines.datasets.br_mp_pep_cargos_funcoes.flows import *
from pipelines.datasets.br_ans_beneficiario.flows import *
from pipelines.datasets.br_anp_precos_combustiveis.flows import *
from pipelines.datasets.br_mg_belohorizonte_smfa_iptu.flows import *
from pipelines.datasets.br_poder360_pesquisas.flows import *
from pipelines.datasets.br_rf_cafir.flows import *
from pipelines.datasets.br_rj_isp_estatisticas_seguranca.flows import *
from pipelines.datasets.br_sp_saopaulo_dieese_icv.flows import *
from pipelines.datasets.br_tse_eleicoes.flows import *
from pipelines.datasets.cross_update.flows import *
from pipelines.datasets.delete_flows.flows import *
from pipelines.datasets.fundacao_lemann.flows import *
from pipelines.datasets.mundo_transfermarkt_competicoes.flows import *
13 changes: 7 additions & 6 deletions pipelines/datasets/botdosdados/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,23 @@
"""
Flows for botdosdados
"""
from prefect import case, Parameter
from prefect import Parameter, case
from prefect.run_configs import KubernetesRun
from prefect.storage import GCS

from pipelines.constants import constants
from pipelines.datasets.botdosdados.schedules import every_day
from pipelines.datasets.botdosdados.tasks import (
was_table_updated,
echo,
get_credentials,
message_last_tables,
message_inflation_plot,
message_last_tables,
send_media,
send_thread,
echo,
was_table_updated,
)
from pipelines.datasets.botdosdados.schedules import every_day
from pipelines.utils.decorators import Flow
from pipelines.utils.tasks import rename_current_flow_run, get_date_time_str
from pipelines.utils.tasks import get_date_time_str, rename_current_flow_run

with Flow(
name="botdosdados.message_inflation", code_owners=["lucas_cr"]
Expand Down
3 changes: 1 addition & 2 deletions pipelines/datasets/botdosdados/schedules.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@

from datetime import datetime

from prefect.schedules import Schedule, filters, adjustments
from prefect.schedules import Schedule, adjustments, filters
from prefect.schedules.clocks import CronClock

from pipelines.constants import constants


every_day = Schedule(
clocks=[
CronClock(
Expand Down
19 changes: 10 additions & 9 deletions pipelines/datasets/botdosdados/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,21 @@
Tasks for botdosdados
"""
import os
from typing import Tuple
from datetime import timedelta, datetime
from collections import defaultdict
from datetime import datetime, timedelta
from typing import Tuple

import tweepy
from tweepy.auth import OAuthHandler
from prefect import task
from basedosdados.download.metadata import _safe_fetch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from pipelines.utils.utils import log, get_storage_blobs, get_credentials_from_secret
import tweepy
from basedosdados.download.metadata import _safe_fetch
from prefect import task
from tweepy.auth import OAuthHandler

from pipelines.constants import constants
from pipelines.utils.utils import get_credentials_from_secret, get_storage_blobs, log


# pylint: disable=C0103
Expand Down
23 changes: 15 additions & 8 deletions pipelines/datasets/br_anatel_banda_larga_fixa/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,38 @@
"""

from datetime import timedelta

from prefect import Parameter, case
from prefect.run_configs import KubernetesRun
from prefect.storage import GCS
from prefect.tasks.prefect import create_flow_run, wait_for_flow_run

from pipelines.constants import constants

from pipelines.utils.metadata.tasks import update_django_metadata
from pipelines.utils.constants import constants as utils_constants
from pipelines.utils.decorators import Flow
from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants
from pipelines.datasets.br_anatel_banda_larga_fixa.tasks import (
treatment,
treatment_br,
treatment_uf,
treatment_municipio,
get_today_date_atualizado,
)

from pipelines.datasets.br_anatel_banda_larga_fixa.schedules import (
every_month_anatel_microdados,
)

from pipelines.datasets.br_anatel_banda_larga_fixa.tasks import (
get_today_date_atualizado,
treatment,
treatment_br,
treatment_municipio,
treatment_uf,
)
from pipelines.utils.constants import constants as utils_constants
from pipelines.utils.decorators import Flow
from pipelines.utils.execute_dbt_model.constants import constants as dump_db_constants
from pipelines.utils.tasks import (
create_table_and_upload_to_gcs,
rename_current_flow_run_dataset_table,
get_current_flow_labels,
rename_current_flow_run_dataset_table,
update_django_metadata,
)

with Flow(
Expand Down
5 changes: 3 additions & 2 deletions pipelines/datasets/br_anatel_banda_larga_fixa/schedules.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
"""

from datetime import datetime

from prefect.schedules import Schedule
from prefect.schedules.clocks import CronClock
from prefect.schedules.clocks import IntervalClock
from prefect.schedules.clocks import CronClock, IntervalClock

from pipelines.constants import constants

every_month_anatel_microdados = Schedule(
Expand Down
15 changes: 6 additions & 9 deletions pipelines/datasets/br_anatel_banda_larga_fixa/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,24 @@
"""
Tasks for br_anatel_banda_larga_fixa
"""
import pandas as pd
import numpy as np
import os

from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

import numpy as np
import pandas as pd
from dateutil.relativedelta import relativedelta
from prefect import task

from pipelines.constants import constants
from pipelines.datasets.br_anatel_banda_larga_fixa.constants import (
constants as anatel_constants,
)
from pipelines.utils.utils import (
to_partitions,
log,
)
from pipelines.datasets.br_anatel_banda_larga_fixa.utils import (
check_and_create_column,
download_and_unzip,
to_partitions_microdados,
)
from pipelines.constants import constants
from pipelines.utils.utils import log, to_partitions


@task(
Expand Down
9 changes: 5 additions & 4 deletions pipelines/datasets/br_anatel_banda_larga_fixa/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
"""
General purpose functions for the br_anatel_banda_larga_fixa
"""
import os
from io import BytesIO
from zipfile import ZipFile
from pathlib import Path
from urllib.request import urlopen
import os
import pandas as pd
from zipfile import ZipFile

import numpy as np
from pathlib import Path
import pandas as pd


def download_and_unzip(url, path):
Expand Down
Loading

0 comments on commit 2d37128

Please sign in to comment.