Skip to content

Commit

Permalink
Merge pull request #624 from basedosdados/staging/br_rf_arrecadacao
Browse files Browse the repository at this point in the history
[dbt]br_rf_arrecadacao novas tabelas
  • Loading branch information
Winzen authored Nov 7, 2024
2 parents 4641ef6 + 0c952c0 commit a94818f
Show file tree
Hide file tree
Showing 13 changed files with 879 additions and 76 deletions.
36 changes: 36 additions & 0 deletions models/br_rf_arrecadacao/br_rf_arrecadacao__cnae.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{{
config(
schema="br_rf_arrecadacao",
alias="cnae",
materialized="table",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 2016, "end": 2024, "interval": 1},
},
cluster_by=["mes"],
)
}}

select
safe_cast(ano as int64) ano,
safe_cast(mes as int64) mes,
safe_cast(secao_sigla as string) secao_sigla,
safe_cast(imposto_importacao as float64) imposto_importacao,
safe_cast(imposto_exportacao as float64) imposto_exportacao,
safe_cast(ipi as float64) ipi,
safe_cast(irpf as float64) irpf,
safe_cast(irpj as float64) irpj,
safe_cast(irrf as float64) irrf,
safe_cast(iof as float64) iof,
safe_cast(itr as float64) itr,
safe_cast(cofins as float64) cofins,
safe_cast(pis_pasep as float64) pis_pasep,
safe_cast(csll as float64) csll,
safe_cast(cide_combustiveis as float64) cide_combustiveis,
safe_cast(contribuicao_previdenciaria as float64) contribuicao_previdenciaria,
safe_cast(cpsss as float64) cpsss,
safe_cast(pagamento_unificado as float64) pagamento_unificado,
safe_cast(outras_receitas_rfb as float64) outras_receitas_rfb,
safe_cast(demais_receitas as float64) demais_receitas,
from `basedosdados-dev.br_rf_arrecadacao_staging.cnae` as t
26 changes: 26 additions & 0 deletions models/br_rf_arrecadacao/br_rf_arrecadacao__ir_ipi.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{{
config(
schema="br_rf_arrecadacao",
alias="ir_ipi",
materialized="table",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 2019, "end": 2024, "interval": 1},
},
cluster_by=["mes"],
)
}}

select
safe_cast(ano as int64) ano,
safe_cast(mes as int64) mes,
safe_cast(tributo as string) tributo,
safe_cast(decendio as string) decendio,
safe_cast(arrecadacao_bruta as float64) arrecadacao_bruta,
safe_cast(retificacao as float64) retificacao,
safe_cast(compensacao as float64) compensacao,
safe_cast(restituicao as float64) restituicao,
safe_cast(outros as float64) outros,
safe_cast(arrecadacao_liquida as float64) arrecadacao_liquida,
from `basedosdados-dev.br_rf_arrecadacao_staging.ir_ipi` as t
22 changes: 22 additions & 0 deletions models/br_rf_arrecadacao/br_rf_arrecadacao__itr.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{{
config(
schema="br_rf_arrecadacao",
alias="itr",
materialized="table",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 2017, "end": 2024, "interval": 1},
},
cluster_by=["mes"],
)
}}

select
safe_cast(ano as int64) ano,
safe_cast(mes as int64) mes,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(sigla_regiao as string) sigla_regiao,
safe_cast(cidade as string) cidade,
safe_cast(valor_arrecadado as float64) valor_arrecadado,
from `basedosdados-dev.br_rf_arrecadacao_staging.itr` as t
47 changes: 47 additions & 0 deletions models/br_rf_arrecadacao/br_rf_arrecadacao__natureza_juridica.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{{
config(
schema="br_rf_arrecadacao",
alias="natureza_juridica",
materialized="table",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 2016, "end": 2024, "interval": 1},
},
cluster_by=["mes"],
)
}}

with
referencia_codigo as (
select
id_natureza_juridica,
substr(cast(id_natureza_juridica as string), 0, 3) as inicio_codigo
from basedosdados - staging.br_bd_diretorios_brasil.natureza_juridica
)
select
safe_cast(t.ano as int64) ano,
safe_cast(t.mes as int64) mes,
safe_cast(
referencia_codigo.id_natureza_juridica as string
) natureza_juridica_codigo,
safe_cast(t.imposto_importacao as float64) imposto_importacao,
safe_cast(t.imposto_exportacao as float64) imposto_exportacao,
safe_cast(t.ipi as float64) ipi,
safe_cast(t.irpf as float64) irpf,
safe_cast(t.irpj as float64) irpj,
safe_cast(t.irrf as float64) irrf,
safe_cast(t.iof as float64) iof,
safe_cast(t.itr as float64) itr,
safe_cast(t.cofins as float64) cofins,
safe_cast(t.pis_pasep as float64) pis_pasep,
safe_cast(t.csll as float64) csll,
safe_cast(t.cide_combustiveis as float64) cide_combustiveis,
safe_cast(t.contribuicao_previdenciaria as float64) contribuicao_previdenciaria,
safe_cast(t.cpsss as float64) cpsss,
safe_cast(t.pagamento_unificado as float64) pagamento_unificado,
safe_cast(t.outras_receitas_rfb as float64) outras_receitas_rfb,
safe_cast(t.demais_receitas as float64) demais_receitas,
from `basedosdados-dev.br_rf_arrecadacao_staging.natureza_juridica` as t
inner join
referencia_codigo on t.natureza_juridica_codigo = referencia_codigo.inicio_codigo
130 changes: 65 additions & 65 deletions models/br_rf_arrecadacao/br_rf_arrecadacao__uf.sql
Original file line number Diff line number Diff line change
@@ -1,65 +1,65 @@
{{
config(
schema="br_rf_arrecadacao",
alias="uf",
materialized="table",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 2000, "end": 2024, "interval": 1},
},
cluster_by=["mes"],
)
}}

select
safe_cast(ano as int64) ano,
safe_cast(mes as int64) mes,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(imposto_importacao as float64) imposto_importacao,
safe_cast(imposto_exportacao as float64) imposto_exportacao,
safe_cast(ipi_fumo as float64) ipi_fumo,
safe_cast(ipi_bebidas as float64) ipi_bebidas,
safe_cast(ipi_automoveis as float64) ipi_automoveis,
safe_cast(ipi_importacoes as float64) ipi_importacoes,
safe_cast(ipi_outros as float64) ipi_outros,
safe_cast(irpf as float64) irpf,
safe_cast(irpj_entidades_financeiras as float64) irpj_entidades_financeiras,
safe_cast(irpj_demais_empresas as float64) irpj_demais_empresas,
safe_cast(irrf_rendimentos_trabalho as float64) irrf_rendimentos_trabalho,
safe_cast(irrf_rendimentos_capital as float64) irrf_rendimentos_capital,
safe_cast(irrf_remessas_exterior as float64) irrf_remessas_exterior,
safe_cast(irrf_outros_rendimentos as float64) irrf_outros_rendimentos,
safe_cast(iof as float64) iof,
safe_cast(itr as float64) itr,
safe_cast(ipmf as float64) ipmf,
safe_cast(cpmf as float64) cpmf,
safe_cast(cofins as float64) cofins,
safe_cast(cofins_financeiras as float64) cofins_entidades_financeiras,
safe_cast(cofins_demais_empresas as float64) cofins_demais_empresas,
safe_cast(pis_pasep as float64) pis_pasep,
safe_cast(
pis_pasep_entidades_financeiras as float64
) pis_pasep_entidades_financeiras,
safe_cast(pis_pasep_demais_empresas as float64) pis_pasep_demais_empresas,
safe_cast(csll as float64) csll,
safe_cast(csll_financeiras as float64) csll_entidades_financeiras,
safe_cast(csll_demais_empresas as float64) csll_demais_empresas,
safe_cast(
cide_combustiveis_parcela_nao_dedutivel as float64
) cide_combustiveis_parcela_nao_dedutivel,
safe_cast(cide_combustiveis as float64) cide_combustiveis,
safe_cast(cpsss_1 as float64) cpsss_1,
safe_cast(cpsss_2 as float64) cpsss_2,
safe_cast(contribuicoes_fundaf as float64) contribuicao_fundaf,
safe_cast(refis as float64) refis,
safe_cast(paes as float64) paes,
safe_cast(retencoes_fonte as float64) retencoes_fonte,
safe_cast(pagamento_unificado as float64) pagamento_unificado,
safe_cast(outras_receitas_ as float64) outras_receitas_rfb,
safe_cast(demais_receitas as float64) demais_receitas,
safe_cast(receita_previdenciaria as float64) receita_previdenciaria,
safe_cast(receita_previdenciaria_propria as float64) receita_previdenciaria_propria,
safe_cast(receita_previdenciaria_demais as float64) receita_previdenciaria_demais,
safe_cast(receitas_outros_orgaos as float64) receitas_outros_orgaos,
from `basedosdados-dev.br_rf_arrecadacao_staging.uf` as t
{{
config(
schema="br_rf_arrecadacao",
alias="uf",
materialized="table",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 2000, "end": 2024, "interval": 1},
},
cluster_by=["mes"],
)
}}

select
safe_cast(ano as int64) ano,
safe_cast(mes as int64) mes,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(imposto_importacao as float64) imposto_importacao,
safe_cast(imposto_exportacao as float64) imposto_exportacao,
safe_cast(ipi_fumo as float64) ipi_fumo,
safe_cast(ipi_bebidas as float64) ipi_bebidas,
safe_cast(ipi_automoveis as float64) ipi_automoveis,
safe_cast(ipi_importacoes as float64) ipi_importacoes,
safe_cast(ipi_outros as float64) ipi_outros,
safe_cast(irpf as float64) irpf,
safe_cast(irpj_entidades_financeiras as float64) irpj_entidades_financeiras,
safe_cast(irpj_demais_empresas as float64) irpj_demais_empresas,
safe_cast(irrf_rendimentos_trabalho as float64) irrf_rendimentos_trabalho,
safe_cast(irrf_rendimentos_capital as float64) irrf_rendimentos_capital,
safe_cast(irrf_remessas_exterior as float64) irrf_remessas_exterior,
safe_cast(irrf_outros_rendimentos as float64) irrf_outros_rendimentos,
safe_cast(iof as float64) iof,
safe_cast(itr as float64) itr,
safe_cast(ipmf as float64) ipmf,
safe_cast(cpmf as float64) cpmf,
safe_cast(cofins as float64) cofins,
safe_cast(cofins_financeiras as float64) cofins_entidades_financeiras,
safe_cast(cofins_demais_empresas as float64) cofins_demais_empresas,
safe_cast(pis_pasep as float64) pis_pasep,
safe_cast(
pis_pasep_entidades_financeiras as float64
) pis_pasep_entidades_financeiras,
safe_cast(pis_pasep_demais_empresas as float64) pis_pasep_demais_empresas,
safe_cast(csll as float64) csll,
safe_cast(csll_financeiras as float64) csll_entidades_financeiras,
safe_cast(csll_demais_empresas as float64) csll_demais_empresas,
safe_cast(
cide_combustiveis_parcela_nao_dedutivel as float64
) cide_combustiveis_parcela_nao_dedutivel,
safe_cast(cide_combustiveis as float64) cide_combustiveis,
safe_cast(cpsss_1 as float64) cpsss_1,
safe_cast(cpsss_2 as float64) cpsss_2,
safe_cast(contribuicoes_fundaf as float64) contribuicao_fundaf,
safe_cast(refis as float64) refis,
safe_cast(paes as float64) paes,
safe_cast(retencoes_fonte as float64) retencoes_fonte,
safe_cast(pagamento_unificado as float64) pagamento_unificado,
safe_cast(outras_receitas_ as float64) outras_receitas_rfb,
safe_cast(demais_receitas as float64) demais_receitas,
safe_cast(receita_previdenciaria as float64) receita_previdenciaria,
safe_cast(receita_previdenciaria_propria as float64) receita_previdenciaria_propria,
safe_cast(receita_previdenciaria_demais as float64) receita_previdenciaria_demais,
safe_cast(receitas_outros_orgaos as float64) receitas_outros_orgaos,
from `basedosdados-dev.br_rf_arrecadacao_staging.uf` as t
50 changes: 50 additions & 0 deletions models/br_rf_arrecadacao/code/clean_cnae.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os
import numpy as np
import pandas as pd
from clean_functions import *

def rename_columns(df):
name_dict = {
'Ano':'ano',
'Mês':'mes',
'Seção - Sigla':'secao_sigla',
'Seção - Nome':'secao_nome',
'II':'imposto_importacao',
'IE':'imposto_exportacao',
'IPI':'ipi',
'IRPF':'irpf',
'IRPJ':'irpj',
'IRRF':'irrf',
'IOF':'iof',
'ITR':'itr',
'Cofins':'cofins',
'Pis/Pasep':'pis_pasep',
'CSLL':'csll',
'Cide': 'cide_combustiveis',
'Contribuição Previdenciária':'contribuicao_previdenciaria',
'CPSSS':'cpsss',
'Pagamento Unificado':'pagamento_unificado',
'Outras Receitas Administradas':'outras_receitas_rfb',
'Receitas Não Administradas':'demais_receitas'
}

return df.rename(columns=name_dict)

def change_types(df):
df['ano'] = df['ano'].astype('int')
df['mes'] = get_month_number(df['mes'])
df['secao_nome'] = df['secao_nome'].str.title()

#All remaining columns are monetary values
for col in df.columns[4:]:
df[col] = df[col].apply(replace_commas).apply(remove_dots).astype('float')

return df

if __name__ == '__main__':
df = read_data(file_dir='../input/arrecadacao-cnae.csv')
df = remove_empty_columns(df)
df = remove_empty_rows(df)
df = rename_columns(df)
df = change_types(df)
save_data(df=df,file_dir='../output/br_rf_arrecadacao_cnae',partition_cols=['ano','mes'])
Loading

0 comments on commit a94818f

Please sign in to comment.