Skip to content

Commit

Permalink
Merge branch 'main' into update-br-inep-sinopse-educacao-basica
Browse files Browse the repository at this point in the history
  • Loading branch information
aspeddro authored Mar 26, 2024
2 parents cd21dbd + e7ff52f commit 8477e35
Show file tree
Hide file tree
Showing 13 changed files with 1,096 additions and 57 deletions.
2 changes: 2 additions & 0 deletions .user.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
---
id: cc3f54e0-fd01-4495-bd12-aa41f3b24444
6 changes: 6 additions & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,18 @@ models:
+post-hook:
- REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
- GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"
br_mme_consumo_energia_eletrica:
+materialized: table
+schema: br_mme_consumo_energia_eletrica
br_mp_pep:
+materialized: table
+schema: br_mp_pep
br_ms_cnes:
+materialized: table
+schema: br_ms_cnes
br_ms_sia:
+materialized: table
+schema: br_ms_sia
br_ms_sim:
+materialized: table
+schema: br_ms_sim
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,25 @@
{{ config(alias="orgao_deputado", schema="br_camara_dados_abertos") }}
select distinct
regexp_extract(uriorgao, r'/orgaos/(\d+)') as id_orgao,
safe_cast(nomeorgao as string) nome,
safe_cast(siglaorgao as string) sigla,
safe_cast(nomedeputado as string) nome_deputado,
safe_cast(cargo as string) cargo,
safe_cast(siglauf as string) sigla_uf,
safe_cast(datainicio as date) data_inicio,
safe_cast(datafim as date) data_final,
safe_cast(siglapartido as string) sigla_partido,
from `basedosdados-staging.br_camara_dados_abertos_staging.orgao_deputado` as t
with
orgao_deputado as (
select distinct
regexp_extract(uriorgao, r'/orgaos/(\d+)') as id_orgao,
safe_cast(nomeorgao as string) nome,
safe_cast(siglaorgao as string) sigla,
safe_cast(nomedeputado as string) nome_deputado,
safe_cast(cargo as string) cargo,
safe_cast(siglauf as string) sigla_uf,
safe_cast(datainicio as date) data_inicio,
safe_cast(datafim as date) data_final,
safe_cast(siglapartido as string) sigla_partido,
from `basedosdados-staging.br_camara_dados_abertos_staging.orgao_deputado`
)
select *
from orgao_deputado
where
not (
nome_deputado = 'Hélio Leite'
and cargo = 'Titular'
and sigla_uf is null
and data_inicio = '2022-05-03'
and data_final = '2023-02-01'
)
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{{ config(alias="proposicao_autor", schema="br_camara_dados_abertos") }}

select
select distinct
safe_cast(idproposicao as string) id_proposicao,
replace(safe_cast(iddeputadoautor as string), ".0", "") id_deputado,
initcap(safe_cast(tipoautor as string)) tipo_autor,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,88 @@
partition_by={
"field": "ano",
"data_type": "INT64",
"range": {"start": 1935, "end": 2023, "interval": 1},
"range": {"start": 1935, "end": 2024, "interval": 1},
},
)
}}

select
safe_cast(ano as int64) ano,
safe_cast(
split(
format_timestamp('%Y-%m-%dT%H:%M:%E*S', timestamp(dataapresentacao)), 'T'
)[offset(0)] as date
) data,
safe_cast(
split(
format_timestamp('%Y-%m-%dT%H:%M:%E*S', timestamp(dataapresentacao)), 'T'
)[offset(1)] as time
) horario,
safe_cast(id as string) id_proposicao,
safe_cast(uri as string) url,
safe_cast(numero as string) numero,
safe_cast(siglatipo as string) sigla,
safe_cast(descricaotipo as string) tipo,
safe_cast(ementa as string) ementa,
safe_cast(ementadetalhada as string) ementa_detalhada,
safe_cast(keywords as string) palavra_chave,
safe_cast(uriorgaonumerador as string) url_orgao_numerador,
safe_cast(uripropprincipal as string) url_principal,
safe_cast(uripropposterior as string) url_posterior,
safe_cast(urlinteiroteor as string) url_teor_proposicao,
safe_cast(ultimostatus_datahora as string) data_hora_ultimo_status,
safe_cast(ultimostatus_urirelator as string) url_relator_ultimo_status,
safe_cast(ultimostatus_siglaorgao as string) sigla_orgao_ultimo_status,
safe_cast(ultimostatus_regime as string) regime_ultimo_status,
safe_cast(ultimostatus_descricaotramitacao as string) tramitacao_ultimo_status,
safe_cast(ultimostatus_descricaosituacao as string) situacao_ultimo_status,
safe_cast(ultimostatus_despacho as string) despacho_ultimo_status,
safe_cast(ultimostatus_apreciacao as string) apreciacao_ultimo_status,
safe_cast(ultimostatus_sequencia as string) sequencia_ultimo_status,
safe_cast(ultimostatus_url as string) url_ultimo_status,
from `basedosdados-staging.br_camara_dados_abertos_staging.proposicao_microdados` as t
with
table as (
select
safe_cast(ano as int64) ano,
safe_cast(
split(
format_timestamp(
'%Y-%m-%dT%H:%M:%E*S', timestamp(dataapresentacao)
),
'T'
)[offset (0)] as date
) data,
safe_cast(
split(
format_timestamp(
'%Y-%m-%dT%H:%M:%E*S', timestamp(dataapresentacao)
),
'T'
)[offset (1)] as time
) horario,
safe_cast(id as string) id_proposicao,
safe_cast(uri as string) url,
safe_cast(numero as string) numero,
safe_cast(siglatipo as string) sigla,
safe_cast(descricaotipo as string) tipo,
safe_cast(ementa as string) ementa,
safe_cast(ementadetalhada as string) ementa_detalhada,
safe_cast(keywords as string) palavra_chave,
safe_cast(uriorgaonumerador as string) url_orgao_numerador,
safe_cast(uripropprincipal as string) url_principal,
safe_cast(uripropposterior as string) url_posterior,
safe_cast(urlinteiroteor as string) url_teor_proposicao,
safe_cast(ultimostatus_datahora as string) data_hora_ultimo_status,
safe_cast(ultimostatus_urirelator as string) url_relator_ultimo_status,
safe_cast(ultimostatus_siglaorgao as string) sigla_orgao_ultimo_status,
safe_cast(ultimostatus_regime as string) regime_ultimo_status,
safe_cast(
ultimostatus_descricaotramitacao as string
) tramitacao_ultimo_status,
safe_cast(ultimostatus_descricaosituacao as string) situacao_ultimo_status,
safe_cast(ultimostatus_despacho as string) despacho_ultimo_status,
safe_cast(ultimostatus_apreciacao as string) apreciacao_ultimo_status,
safe_cast(ultimostatus_sequencia as string) sequencia_ultimo_status,
safe_cast(ultimostatus_url as string) url_ultimo_status,
from
`basedosdados-staging.br_camara_dados_abertos_staging.proposicao_microdados`
as t
),
query_total as (
select
ano,
case when data >= current_date() then null else data end as data,
horario,
id_proposicao,
url,
numero,
sigla,
tipo,
ementa,
ementa_detalhada,
palavra_chave,
url_orgao_numerador,
url_principal,
url_posterior,
url_teor_proposicao,
data_hora_ultimo_status,
url_relator_ultimo_status,
sigla_orgao_ultimo_status,
regime_ultimo_status,
tramitacao_ultimo_status,
situacao_ultimo_status,
despacho_ultimo_status,
apreciacao_ultimo_status,
sequencia_ultimo_status,
url_ultimo_status,
from table
)
select distinct *
from query_total
where not (ano = 2011 and id_proposicao = '510035')
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,17 @@
)
}}

select
safe_cast(replace(ano, ".0", "") as int64) ano,
regexp_extract(uriproposicao, r'/proposicoes/(\d+)') as id_proposicao,
safe_cast(siglatipo as string) tipo_proposicao,
safe_cast(numero as string) numero,
safe_cast(codtema as string) tema,
safe_cast(relevancia as int64) relevancia,
from `basedosdados-staging.br_camara_dados_abertos_staging.proposicao_tema` as t
with
tables as (
select
safe_cast(replace(ano, ".0", "") as int64) as ano,
regexp_extract(uriproposicao, r'/proposicoes/(\d+)') as id_proposicao,
safe_cast(siglatipo as string) as tipo_proposicao,
safe_cast(numero as string) as numero,
safe_cast(tema as string) as tema,
safe_cast(relevancia as int64) as relevancia
from `basedosdados-staging.br_camara_dados_abertos_staging.proposicao_tema`
)
select *
from tables
where not (ano = 2011 and id_proposicao = '510035')
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,77 @@ with
safe_cast(forma_de_declaracao_da_idade as string) forma_declaracao_idade,
safe_cast(sexo as string) sexo,
safe_cast(idade as string) idade,
case
when idade = 'Menos de 1 mês'
then 0
when regexp_contains(idade, r'[0-9]+ mês')
then safe_cast(regexp_extract(idade, r'[0-9]+ mês') as int64) / 12
when regexp_contains(idade, r'[0-9]+ meses')
then safe_cast(regexp_extract(idade, r'([0-9])+ meses') as int64) / 12
when regexp_contains(idade, r'[0-9]+ anos')
then cast(regexp_extract(idade, r'([0-9]+) anos') as int64)
when regexp_contains(idade, r'[0-9]+ ano')
then cast(regexp_extract(idade, r'([0-9]+) ano') as int64)
end as idade_num,
safe_cast(populacao_residente_pessoas_ as int64) populacao_residente,
from
`basedosdados-staging.br_ibge_censo_2022_staging.populacao_residente_municipio` t
)
select t2.cod as id_municipio, ibge.* except (municipio, nome_municipio, sigla_uf)
select
t2.cod as id_municipio,
ibge.* except (municipio, nome_municipio, sigla_uf, idade_num, populacao_residente),
idade_num as idade_anos,
case
when idade_num between 0 and 4
then '0 a 4 anos'
when idade_num between 5 and 9
then '5 a 9 anos'
when idade_num between 10 and 14
then '10 a 14 anos'
when idade_num between 15 and 19
then '15 a 19 anos'
when idade_num between 20 and 24
then '20 a 24 anos'
when idade_num between 25 and 29
then '25 a 29 anos'
when idade_num between 30 and 34
then '30 a 34 anos'
when idade_num between 35 and 39
then '35 a 39 anos'
when idade_num between 40 and 44
then '40 a 44 anos'
when idade_num between 45 and 49
then '45 a 49 anos'
when idade_num between 50 and 54
then '50 a 54 anos'
when idade_num between 55 and 59
then '55 a 59 anos'
when idade_num between 60 and 64
then '60 a 64 anos'
when idade_num between 65 and 69
then '65 a 69 anos'
when idade_num between 70 and 74
then '70 a 74 anos'
when idade_num between 75 and 79
then '75 a 79 anos'
when idade_num between 80 and 84
then '80 a 84 anos'
when idade_num between 85 and 89
then '85 a 89 anos'
when idade_num between 90 and 94
then '90 a 94 anos'
when idade_num between 95 and 99
then '95 a 99 anos'
else '100 anos ou mais'
end as grupo_idade,
populacao_residente
from ibge
left join
`basedosdados-dev.br_ibge_censo_2022_staging.auxiliary_table` t2
on ibge.municipio = t2.municipio
where
not (
idade like '% a %'
or idade like '100 anos ou mais'
or idade like 'Menos de 1 ano'
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{{
config(
alias="uf",
schema="br_mme_consumo_energia_eletrica",
materialized="table",
)
}}
select
safe_cast(ano as int64) as ano,
safe_cast(mes as int64) as mes,
safe_cast(sigla_uf as string) as sigla_uf,
safe_cast(tipo_consumo as string) as tipo_consumo,
case
when numero_consumidores = '0'
then null
else safe_cast(numero_consumidores as int64)
end as numero_consumidores,
safe_cast(consumo as int64) as consumo
from `basedosdados-staging.br_mme_consumo_energia_eletrica_staging.uf` as t
Loading

0 comments on commit 8477e35

Please sign in to comment.