-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #624 from basedosdados/staging/br_rf_arrecadacao
[dbt]br_rf_arrecadacao novas tabelas
- Loading branch information
Showing
13 changed files
with
879 additions
and
76 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{{ | ||
config( | ||
schema="br_rf_arrecadacao", | ||
alias="cnae", | ||
materialized="table", | ||
partition_by={ | ||
"field": "ano", | ||
"data_type": "int64", | ||
"range": {"start": 2016, "end": 2024, "interval": 1}, | ||
}, | ||
cluster_by=["mes"], | ||
) | ||
}} | ||
|
||
select | ||
safe_cast(ano as int64) ano, | ||
safe_cast(mes as int64) mes, | ||
safe_cast(secao_sigla as string) secao_sigla, | ||
safe_cast(imposto_importacao as float64) imposto_importacao, | ||
safe_cast(imposto_exportacao as float64) imposto_exportacao, | ||
safe_cast(ipi as float64) ipi, | ||
safe_cast(irpf as float64) irpf, | ||
safe_cast(irpj as float64) irpj, | ||
safe_cast(irrf as float64) irrf, | ||
safe_cast(iof as float64) iof, | ||
safe_cast(itr as float64) itr, | ||
safe_cast(cofins as float64) cofins, | ||
safe_cast(pis_pasep as float64) pis_pasep, | ||
safe_cast(csll as float64) csll, | ||
safe_cast(cide_combustiveis as float64) cide_combustiveis, | ||
safe_cast(contribuicao_previdenciaria as float64) contribuicao_previdenciaria, | ||
safe_cast(cpsss as float64) cpsss, | ||
safe_cast(pagamento_unificado as float64) pagamento_unificado, | ||
safe_cast(outras_receitas_rfb as float64) outras_receitas_rfb, | ||
safe_cast(demais_receitas as float64) demais_receitas, | ||
from `basedosdados-dev.br_rf_arrecadacao_staging.cnae` as t |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
{{ | ||
config( | ||
schema="br_rf_arrecadacao", | ||
alias="ir_ipi", | ||
materialized="table", | ||
partition_by={ | ||
"field": "ano", | ||
"data_type": "int64", | ||
"range": {"start": 2019, "end": 2024, "interval": 1}, | ||
}, | ||
cluster_by=["mes"], | ||
) | ||
}} | ||
|
||
select | ||
safe_cast(ano as int64) ano, | ||
safe_cast(mes as int64) mes, | ||
safe_cast(tributo as string) tributo, | ||
safe_cast(decendio as string) decendio, | ||
safe_cast(arrecadacao_bruta as float64) arrecadacao_bruta, | ||
safe_cast(retificacao as float64) retificacao, | ||
safe_cast(compensacao as float64) compensacao, | ||
safe_cast(restituicao as float64) restituicao, | ||
safe_cast(outros as float64) outros, | ||
safe_cast(arrecadacao_liquida as float64) arrecadacao_liquida, | ||
from `basedosdados-dev.br_rf_arrecadacao_staging.ir_ipi` as t |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{{ | ||
config( | ||
schema="br_rf_arrecadacao", | ||
alias="itr", | ||
materialized="table", | ||
partition_by={ | ||
"field": "ano", | ||
"data_type": "int64", | ||
"range": {"start": 2017, "end": 2024, "interval": 1}, | ||
}, | ||
cluster_by=["mes"], | ||
) | ||
}} | ||
|
||
select | ||
safe_cast(ano as int64) ano, | ||
safe_cast(mes as int64) mes, | ||
safe_cast(sigla_uf as string) sigla_uf, | ||
safe_cast(sigla_regiao as string) sigla_regiao, | ||
safe_cast(cidade as string) cidade, | ||
safe_cast(valor_arrecadado as float64) valor_arrecadado, | ||
from `basedosdados-dev.br_rf_arrecadacao_staging.itr` as t |
47 changes: 47 additions & 0 deletions
47
models/br_rf_arrecadacao/br_rf_arrecadacao__natureza_juridica.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
{{ | ||
config( | ||
schema="br_rf_arrecadacao", | ||
alias="natureza_juridica", | ||
materialized="table", | ||
partition_by={ | ||
"field": "ano", | ||
"data_type": "int64", | ||
"range": {"start": 2016, "end": 2024, "interval": 1}, | ||
}, | ||
cluster_by=["mes"], | ||
) | ||
}} | ||
|
||
with | ||
referencia_codigo as ( | ||
select | ||
id_natureza_juridica, | ||
substr(cast(id_natureza_juridica as string), 0, 3) as inicio_codigo | ||
from basedosdados - staging.br_bd_diretorios_brasil.natureza_juridica | ||
) | ||
select | ||
safe_cast(t.ano as int64) ano, | ||
safe_cast(t.mes as int64) mes, | ||
safe_cast( | ||
referencia_codigo.id_natureza_juridica as string | ||
) natureza_juridica_codigo, | ||
safe_cast(t.imposto_importacao as float64) imposto_importacao, | ||
safe_cast(t.imposto_exportacao as float64) imposto_exportacao, | ||
safe_cast(t.ipi as float64) ipi, | ||
safe_cast(t.irpf as float64) irpf, | ||
safe_cast(t.irpj as float64) irpj, | ||
safe_cast(t.irrf as float64) irrf, | ||
safe_cast(t.iof as float64) iof, | ||
safe_cast(t.itr as float64) itr, | ||
safe_cast(t.cofins as float64) cofins, | ||
safe_cast(t.pis_pasep as float64) pis_pasep, | ||
safe_cast(t.csll as float64) csll, | ||
safe_cast(t.cide_combustiveis as float64) cide_combustiveis, | ||
safe_cast(t.contribuicao_previdenciaria as float64) contribuicao_previdenciaria, | ||
safe_cast(t.cpsss as float64) cpsss, | ||
safe_cast(t.pagamento_unificado as float64) pagamento_unificado, | ||
safe_cast(t.outras_receitas_rfb as float64) outras_receitas_rfb, | ||
safe_cast(t.demais_receitas as float64) demais_receitas, | ||
from `basedosdados-dev.br_rf_arrecadacao_staging.natureza_juridica` as t | ||
inner join | ||
referencia_codigo on t.natureza_juridica_codigo = referencia_codigo.inicio_codigo |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,65 +1,65 @@ | ||
{{ | ||
config( | ||
schema="br_rf_arrecadacao", | ||
alias="uf", | ||
materialized="table", | ||
partition_by={ | ||
"field": "ano", | ||
"data_type": "int64", | ||
"range": {"start": 2000, "end": 2024, "interval": 1}, | ||
}, | ||
cluster_by=["mes"], | ||
) | ||
}} | ||
|
||
select | ||
safe_cast(ano as int64) ano, | ||
safe_cast(mes as int64) mes, | ||
safe_cast(sigla_uf as string) sigla_uf, | ||
safe_cast(imposto_importacao as float64) imposto_importacao, | ||
safe_cast(imposto_exportacao as float64) imposto_exportacao, | ||
safe_cast(ipi_fumo as float64) ipi_fumo, | ||
safe_cast(ipi_bebidas as float64) ipi_bebidas, | ||
safe_cast(ipi_automoveis as float64) ipi_automoveis, | ||
safe_cast(ipi_importacoes as float64) ipi_importacoes, | ||
safe_cast(ipi_outros as float64) ipi_outros, | ||
safe_cast(irpf as float64) irpf, | ||
safe_cast(irpj_entidades_financeiras as float64) irpj_entidades_financeiras, | ||
safe_cast(irpj_demais_empresas as float64) irpj_demais_empresas, | ||
safe_cast(irrf_rendimentos_trabalho as float64) irrf_rendimentos_trabalho, | ||
safe_cast(irrf_rendimentos_capital as float64) irrf_rendimentos_capital, | ||
safe_cast(irrf_remessas_exterior as float64) irrf_remessas_exterior, | ||
safe_cast(irrf_outros_rendimentos as float64) irrf_outros_rendimentos, | ||
safe_cast(iof as float64) iof, | ||
safe_cast(itr as float64) itr, | ||
safe_cast(ipmf as float64) ipmf, | ||
safe_cast(cpmf as float64) cpmf, | ||
safe_cast(cofins as float64) cofins, | ||
safe_cast(cofins_financeiras as float64) cofins_entidades_financeiras, | ||
safe_cast(cofins_demais_empresas as float64) cofins_demais_empresas, | ||
safe_cast(pis_pasep as float64) pis_pasep, | ||
safe_cast( | ||
pis_pasep_entidades_financeiras as float64 | ||
) pis_pasep_entidades_financeiras, | ||
safe_cast(pis_pasep_demais_empresas as float64) pis_pasep_demais_empresas, | ||
safe_cast(csll as float64) csll, | ||
safe_cast(csll_financeiras as float64) csll_entidades_financeiras, | ||
safe_cast(csll_demais_empresas as float64) csll_demais_empresas, | ||
safe_cast( | ||
cide_combustiveis_parcela_nao_dedutivel as float64 | ||
) cide_combustiveis_parcela_nao_dedutivel, | ||
safe_cast(cide_combustiveis as float64) cide_combustiveis, | ||
safe_cast(cpsss_1 as float64) cpsss_1, | ||
safe_cast(cpsss_2 as float64) cpsss_2, | ||
safe_cast(contribuicoes_fundaf as float64) contribuicao_fundaf, | ||
safe_cast(refis as float64) refis, | ||
safe_cast(paes as float64) paes, | ||
safe_cast(retencoes_fonte as float64) retencoes_fonte, | ||
safe_cast(pagamento_unificado as float64) pagamento_unificado, | ||
safe_cast(outras_receitas_ as float64) outras_receitas_rfb, | ||
safe_cast(demais_receitas as float64) demais_receitas, | ||
safe_cast(receita_previdenciaria as float64) receita_previdenciaria, | ||
safe_cast(receita_previdenciaria_propria as float64) receita_previdenciaria_propria, | ||
safe_cast(receita_previdenciaria_demais as float64) receita_previdenciaria_demais, | ||
safe_cast(receitas_outros_orgaos as float64) receitas_outros_orgaos, | ||
from `basedosdados-dev.br_rf_arrecadacao_staging.uf` as t | ||
{{ | ||
config( | ||
schema="br_rf_arrecadacao", | ||
alias="uf", | ||
materialized="table", | ||
partition_by={ | ||
"field": "ano", | ||
"data_type": "int64", | ||
"range": {"start": 2000, "end": 2024, "interval": 1}, | ||
}, | ||
cluster_by=["mes"], | ||
) | ||
}} | ||
|
||
select | ||
safe_cast(ano as int64) ano, | ||
safe_cast(mes as int64) mes, | ||
safe_cast(sigla_uf as string) sigla_uf, | ||
safe_cast(imposto_importacao as float64) imposto_importacao, | ||
safe_cast(imposto_exportacao as float64) imposto_exportacao, | ||
safe_cast(ipi_fumo as float64) ipi_fumo, | ||
safe_cast(ipi_bebidas as float64) ipi_bebidas, | ||
safe_cast(ipi_automoveis as float64) ipi_automoveis, | ||
safe_cast(ipi_importacoes as float64) ipi_importacoes, | ||
safe_cast(ipi_outros as float64) ipi_outros, | ||
safe_cast(irpf as float64) irpf, | ||
safe_cast(irpj_entidades_financeiras as float64) irpj_entidades_financeiras, | ||
safe_cast(irpj_demais_empresas as float64) irpj_demais_empresas, | ||
safe_cast(irrf_rendimentos_trabalho as float64) irrf_rendimentos_trabalho, | ||
safe_cast(irrf_rendimentos_capital as float64) irrf_rendimentos_capital, | ||
safe_cast(irrf_remessas_exterior as float64) irrf_remessas_exterior, | ||
safe_cast(irrf_outros_rendimentos as float64) irrf_outros_rendimentos, | ||
safe_cast(iof as float64) iof, | ||
safe_cast(itr as float64) itr, | ||
safe_cast(ipmf as float64) ipmf, | ||
safe_cast(cpmf as float64) cpmf, | ||
safe_cast(cofins as float64) cofins, | ||
safe_cast(cofins_financeiras as float64) cofins_entidades_financeiras, | ||
safe_cast(cofins_demais_empresas as float64) cofins_demais_empresas, | ||
safe_cast(pis_pasep as float64) pis_pasep, | ||
safe_cast( | ||
pis_pasep_entidades_financeiras as float64 | ||
) pis_pasep_entidades_financeiras, | ||
safe_cast(pis_pasep_demais_empresas as float64) pis_pasep_demais_empresas, | ||
safe_cast(csll as float64) csll, | ||
safe_cast(csll_financeiras as float64) csll_entidades_financeiras, | ||
safe_cast(csll_demais_empresas as float64) csll_demais_empresas, | ||
safe_cast( | ||
cide_combustiveis_parcela_nao_dedutivel as float64 | ||
) cide_combustiveis_parcela_nao_dedutivel, | ||
safe_cast(cide_combustiveis as float64) cide_combustiveis, | ||
safe_cast(cpsss_1 as float64) cpsss_1, | ||
safe_cast(cpsss_2 as float64) cpsss_2, | ||
safe_cast(contribuicoes_fundaf as float64) contribuicao_fundaf, | ||
safe_cast(refis as float64) refis, | ||
safe_cast(paes as float64) paes, | ||
safe_cast(retencoes_fonte as float64) retencoes_fonte, | ||
safe_cast(pagamento_unificado as float64) pagamento_unificado, | ||
safe_cast(outras_receitas_ as float64) outras_receitas_rfb, | ||
safe_cast(demais_receitas as float64) demais_receitas, | ||
safe_cast(receita_previdenciaria as float64) receita_previdenciaria, | ||
safe_cast(receita_previdenciaria_propria as float64) receita_previdenciaria_propria, | ||
safe_cast(receita_previdenciaria_demais as float64) receita_previdenciaria_demais, | ||
safe_cast(receitas_outros_orgaos as float64) receitas_outros_orgaos, | ||
from `basedosdados-dev.br_rf_arrecadacao_staging.uf` as t |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import os | ||
import numpy as np | ||
import pandas as pd | ||
from clean_functions import * | ||
|
||
def rename_columns(df): | ||
name_dict = { | ||
'Ano':'ano', | ||
'Mês':'mes', | ||
'Seção - Sigla':'secao_sigla', | ||
'Seção - Nome':'secao_nome', | ||
'II':'imposto_importacao', | ||
'IE':'imposto_exportacao', | ||
'IPI':'ipi', | ||
'IRPF':'irpf', | ||
'IRPJ':'irpj', | ||
'IRRF':'irrf', | ||
'IOF':'iof', | ||
'ITR':'itr', | ||
'Cofins':'cofins', | ||
'Pis/Pasep':'pis_pasep', | ||
'CSLL':'csll', | ||
'Cide': 'cide_combustiveis', | ||
'Contribuição Previdenciária':'contribuicao_previdenciaria', | ||
'CPSSS':'cpsss', | ||
'Pagamento Unificado':'pagamento_unificado', | ||
'Outras Receitas Administradas':'outras_receitas_rfb', | ||
'Receitas Não Administradas':'demais_receitas' | ||
} | ||
|
||
return df.rename(columns=name_dict) | ||
|
||
def change_types(df): | ||
df['ano'] = df['ano'].astype('int') | ||
df['mes'] = get_month_number(df['mes']) | ||
df['secao_nome'] = df['secao_nome'].str.title() | ||
|
||
#All remaining columns are monetary values | ||
for col in df.columns[4:]: | ||
df[col] = df[col].apply(replace_commas).apply(remove_dots).astype('float') | ||
|
||
return df | ||
|
||
if __name__ == '__main__': | ||
df = read_data(file_dir='../input/arrecadacao-cnae.csv') | ||
df = remove_empty_columns(df) | ||
df = remove_empty_rows(df) | ||
df = rename_columns(df) | ||
df = change_types(df) | ||
save_data(df=df,file_dir='../output/br_rf_arrecadacao_cnae',partition_cols=['ano','mes']) |
Oops, something went wrong.