diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 00000000..ccfdd206
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,63 @@
+
+# Template Pull Requests - Queries-basedosdados
+
+## Nomeação do Pull Request
+
+A nomeação de cada Pull Request (PR) deve seguir o seguinte padrão:
+
+- O título de cada Pull Request (PR) deve começar com uma das seguintes palavras-chave, entre colchetes. Além disso, **marque a palavra-chave que melhor descreve o seu PR atual**:
+ - [ ] **[Dbt]**: Para subida de novos dados em produção.
+ - [ ] **[Feature]**: Para novas funcionalidades.
+ - [ ] **[Table-Approve]**: Para rodar apenas o Table-Approve.
+ - [ ] **[Bugfix]**: Para correções de bugs.
+ - [ ] **[Refactor]**: Para mudanças no código que não alteram a funcionalidade.
+ - [ ] **[Docs]**: Para atualizações na documentação.
+ - [ ] **[Test]**: Para mudanças relacionadas a testes.
+ - [ ] **[Chore]**: Para tarefas menores e de manutenção.
+
+---
+ - Exemplos de título:
+ - **[docs] br_me_caged**
+ - **[Feature] br_cgu_servidores_publicos**
+
+## Draft:
+- Ao abrir o PR, deverá coloca-lo como draft
+
+
+## Descrição do PR:
+- Explique de maneira clara e concisa o objetivo deste PR. O que foi alterado? Qual o problema que ele resolve?
+ - **Motivação/Contexto:**
+
+
+## Detalhes Técnicos:
+- Detalhe as mudanças mais técnicas, como ajustes no dbt, scripts ou modelo de dados utilizado.
+ - **Principais alterações no dbt/scripts:**
+ - **Mudanças nos dados e no schema:**
+ - **Impacto no desempenho:**
+
+- Se alguma parte do código precisar de alguma atenção a mais, comente na linha sinalizando para os revisores.
+
+## Teste e Validações:
+
+- Relate os testes e validações relacionado aos dados/script:
+ - [ ] Testado em `queries-basedosdados-dev`
+ - [ ] **Decisões relacionadas aos testes:**
+
+ - **Caso haja algo relacionado aos testes que vale a pena informar:**
+
+## Riscos e Mitigações:
+- Identifique os riscos potenciais desta mudança e como mitigar esses Riscos
+
+ - Riscos conhecidos:
+ - Planos de rollback:
+
+## Dependencias:
+- Liste quaisquer dependências externas, como bibliotecas, outros PRs ou mudanças que precisam ser feitas antes deste merge.
+ - [ ] Dependências:
+ - [ ] Nenhuma dependencias adicional
+
+
+## Revisadores:
+- Quando o PR estiver pronto para ser revisado, retire o **Draft** através do **Ready for reviews**, marque os revisadores de repositório, envie o PR no nosso [discord](https://discord.gg/V3yTWRYWZZ) na aba **Correções de PRs, arquiteturas e afins** e marque a **@equipe_dados**:
+ - Revisadores recomendados no github:
+ - basedosdados/dados
diff --git a/dbt_project.yml b/dbt_project.yml
index a9d1f3bf..97933eb0 100644
--- a/dbt_project.yml
+++ b/dbt_project.yml
@@ -1,400 +1,409 @@
----
-# Name your project! Project names should contain only lowercase characters
-# and underscores. A good package name should reflect your organization's
-# name or the intended use of these models
-name: basedosdados
-version: 1.0.0
-config-version: 2
-
-# This setting configures which "profile" dbt uses for this project.
-profile: default
-# Options to disable elementary models
-vars:
- disable_run_results: false
- disable_tests_results: false
- disable_dbt_artifacts_autoupload: true
- disable_dbt_invocation_autoupload: true
-# These configurations specify where dbt should look for different types of files.
-# The `source-paths` config, for example, states that models in this project can be
-# found in the "models/" directory. You probably won't need to change these!
-model-paths: [models]
-analysis-paths: [analysis]
-test-paths: [tests]
-seed-paths: [data]
-macro-paths: [macros]
-snapshot-paths: [snapshots]
-target-path: target # directory which will store compiled SQL files
-clean-targets: # directories to be removed by `dbt clean`
- - target
- - dbt_modules
-# Grant acess
-# bq data control: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-control-language
-# dbt grant statements use https://discourse.getdbt.com/t/the-exact-grant-statements-we-use-in-a-dbt-project/430
-# Configuring models
-# Full documentation: https://docs.getdbt.com/docs/configuring-models
-# In this example config, we tell dbt to build all models in the example/ directory
-# as tables. These settings can be overridden in the individual model files
-# using the `{{ config(...) }}` macro.
-models:
- +post-hook: GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "specialGroup:allUsers"
- +persist_docs:
- relation: true
- columns: true
- basedosdados:
- # Config indicated by + and applies to all files under models/example/
- br_anatel_banda_larga_fixa:
- +materialized: table
- +schema: br_anatel_banda_larga_fixa
- br_anatel_telefonia_movel:
- +materialized: table # Materialization type (table, view or incremental)
- +schema: br_anatel_telefonia_movel
- br_anp_precos_combustiveis:
- +materialized: table
- +schema: br_anp_precos_combustiveis
- br_ans_beneficiario:
- +materialized: incremental
- +schema: br_ans_beneficiario
- br_b3_cotacoes:
- +materialized: table
- +schema: br_b3_cotacoes
- br_bcb_agencia:
- +materialized: table
- +schema: br_bcb_agencia
- br_bcb_estban:
- +materialized: table
- +schema: br_bcb_estban
- br_bcb_sicor:
- +materialized: table
- +schema: br_bcb_sicor
- br_bcb_taxa_cambio:
- +materialized: table
- +schema: br_bcb_taxa_cambio
- +post-hook:
- - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
- - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org"
- br_bcb_taxa_selic:
- +materialized: table
- +schema: br_bcb_taxa_selic
- +post-hook:
- - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
- - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org"
- br_bd_diretorios_brasil:
- +materialized: table
- +schema: br_bd_diretorios_brasil
- br_bd_diretorios_mundo:
- +materialized: table
- +schema: br_bd_diretorios_mundo
- br_bd_indicadores:
- +materialized: table
- +schema: br_bd_indicadores
- br_bd_metadados:
- +materialized: table
- +schema: br_bd_metadados
- br_bd_siga_o_dinheiro:
- +materialized: table
- +schema: br_bd_siga_o_dinheiro
- br_camara_dados_abertos:
- +materialized: table
- +schema: br_camara_dados_abertos
- br_ce_fortaleza_sefin_iptu:
- +materialized: table
- +schema: br_ce_fortaleza_sefin_iptu
- +post-hook:
- - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
- - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org"
- br_cgu_beneficios_cidadao:
- +materialized: table
- +schema: br_cgu_beneficios_cidadao
- br_cgu_cartao_pagamento:
- +materialized: table
- +schema: br_cgu_cartao_pagamento
- br_cgu_dados_abertos:
- +materialized: table
- +schema: br_cgu_dados_abertos
- br_cgu_emendas_parlamentares:
- +materialized: table
- +schema: br_cgu_emendas_parlamentares
- br_cgu_pessoal_executivo_federal:
- +materialized: table
- +schema: br_cgu_pessoal_executivo_federal
- br_cgu_servidores_executivo_federal:
- +materialized: table
- +schema: br_cgu_servidores_executivo_federal
- br_cnj_improbidade_administrativa:
- +materialized: table
- +schema: br_cnj_improbidade_administrativa
- br_cnpq_bolsas:
- +materialized: table
- +schema: br_cnpq_bolsas
- br_cvm_administradores_carteira:
- +materialized: table
- +schema: br_cvm_administradores_carteira
- br_cvm_fi:
- +materialized: table
- +schema: br_cvm_fi
- +post-hook:
- - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
- - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org"
- br_cvm_oferta_publica_distribuicao:
- +materialized: table
- +schema: br_cvm_oferta_publica_distribuicao
- br_datahackers_state_data:
- +materialized: table
- +schema: br_datahackers_state_data
- br_denatran_frota:
- +materialized: table
- +schema: br_denatran_frota
- br_fbsp_absp:
- +materialized: table
- +schema: br_fbsp_absp
- br_fgv_igp:
- +materialized: table
- +schema: br_fgv_igp
- br_geobr_mapas:
- +materialized: table
- +schema: br_geobr_mapas
- br_ibge_censo_2022:
- +materialized: table
- +schema: br_ibge_censo_2022
- br_ibge_censo_demografico:
- +materialized: table
- +schema: br_ibge_censo_demografico
- br_ibge_estadic:
- +materialized: table
- +schema: br_ibge_estadic
- br_ibge_inpc:
- +materialized: table
- +schema: br_ibge_inpc
- br_ibge_ipca:
- +materialized: table
- +schema: br_ibge_ipca
- br_ibge_ipca15:
- +materialized: table
- +schema: br_ibge_ipca15
- br_ibge_pam:
- +materialized: table
- +schema: br_ibge_pam
- br_ibge_pevs:
- +materialized: table
- +schema: br_ibge_pevs
- br_ibge_pib:
- +materialized: table
- +schema: br_ibge_pib
- br_ibge_pnad:
- +materialized: table
- +schema: br_ibge_pnad
- br_ibge_pnad_covid:
- +materialized: table
- +schema: br_ibge_pnad_covid
- br_ibge_pnadc:
- +materialized: table
- +schema: br_ibge_pnadc
- br_ibge_pof:
- +materialized: table
- +schema: br_ibge_pof
- br_ibge_ppm:
- +materialized: table
- +schema: br_ibge_ppm
- br_inep_ana:
- +materialized: table
- +schema: br_inep_ana
- br_inep_censo_educacao_superior:
- +materialized: table
- +schema: br_inep_censo_educacao_superior
- br_inep_censo_escolar:
- +materialized: table
- +schema: br_inep_censo_escolar
- br_inep_enem:
- +materialized: table
- +schema: br_inep_enem
- br_inep_formacao_docente:
- +materialized: table
- +schema: br_inep_formacao_docente
- br_inep_ideb:
- +materialized: table
- +schema: br_inep_ideb
- br_inep_indicador_nivel_socioeconomico:
- +materialized: table
- +schema: br_inep_indicador_nivel_socioeconomico
- br_inep_indicadores_educacionais:
- +materialized: table
- +schema: br_inep_indicadores_educacionais
- br_inep_saeb:
- +materialized: table
- +schema: br_inep_saeb
- br_inep_sinopse_estatistica_educacao_basica:
- +materialized: table
- +schema: br_inep_sinopse_estatistica_educacao_basica
- br_inmet_bdmep:
- +materialized: table
- +schema: br_inmet_bdmep
- br_inpe_prodes:
- +materialized: table
- +schema: br_inpe_prodes
- br_inpe_queimadas:
- +materialized: table
- +schema: br_inpe_queimadas
- br_inpe_sisam:
- +materialized: table
- +schema: br_inpe_sisam
- br_ipea_avs:
- +materialized: table
- +schema: br_ipea_avs
- br_jota:
- +materialized: table
- +schema: br_jota
- br_mdr_snis:
- +materialized: table
- +schema: br_mdr_snis
- br_me_caged:
- +materialized: table
- +schema: br_me_caged
- br_me_cno:
- +materialized: table
- +schema: br_me_cno
- br_me_cnpj:
- +materialized: table
- +schema: br_me_cnpj
- br_me_comex_stat:
- +materialized: table
- +schema: br_me_comex_stat
- br_me_rais:
- +materialized: table
- +schema: br_me_rais
- br_me_sic:
- +materialized: table
- +schema: br_me_sic
- br_me_siconfi:
- +materialized: table
- +schema: br_me_siconfi
- br_mec_prouni:
- +materialized: table
- +schema: br_mec_prouni
- br_mec_sisu:
- +materialized: table
- +schema: br_mec_sisu
- br_mercadolivre_ofertas:
- +materialized: table
- +schema: br_mercadolivre_ofertas
- +post-hook:
- - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
- - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org"
- br_mg_belohorizonte_smfa_iptu:
- +materialized: table
- +schema: br_mg_belohorizonte_smfa_iptu
- br_mme_consumo_energia_eletrica:
- +materialized: table
- +schema: br_mme_consumo_energia_eletrica
- br_mp_pep:
- +materialized: table
- +schema: br_mp_pep
- br_ms_cnes:
- +materialized: table
- +schema: br_ms_cnes
- br_ms_pns:
- +materialized: table
- +schema: br_ms_pns
- br_ms_sia:
- +materialized: table
- +schema: br_ms_sia
- br_ms_sih:
- +materialized: table
- +schema: br_ms_sih
- br_ms_sim:
- +materialized: table
- +schema: br_ms_sim
- br_ms_sinan:
- +materialized: table
- +schema: br_ms_sinan
- br_ms_sinasc:
- +materialized: table
- +schema: br_ms_sinasc
- br_ms_sisvan:
- +materialized: table
- +schema: br_ms_sisvan
- br_ms_vacinacao_covid19:
- +materialized: table
- +schema: br_ms_vacinacao_covid19
- br_ons_avaliacao_operacao:
- +materialized: table
- +schema: br_ons_avaliacao_operacao
- br_ons_estimativa_custos:
- +materialized: table
- +schema: br_ons_estimativa_custos
- br_poder360_pesquisas:
- +materialized: table
- +schema: br_poder360_pesquisas
- br_rf_arrecadacao:
- +materialized: table
- +schema: br_rf_arrecadacao
- br_rf_cafir:
- +materialized: table
- +schema: br_rf_cafir
- br_rf_cno:
- +materialized: table
- +schema: br_rf_cno
- br_rj_isp_estatisticas_seguranca:
- +materialized: table
- +schema: br_rj_isp_estatisticas_seguranca
- br_sp_saopaulo_dieese_icv:
- +materialized: table
- +schema: br_sp_saopaulo_dieese_icv
- br_sp_saopaulo_geosampa_iptu:
- +materialized: table
- +schema: br_sp_saopaulo_geosampa_iptu
- +post-hook:
- - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
- - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org"
- br_stf_corte_aberta:
- +materialized: table
- +schema: br_stf_corte_aberta
- br_trase_supply_chain:
- +materialized: table
- +schema: br_trase_supply_chain
- br_tse_eleicoes:
- +materialized: table
- +schema: br_tse_eleicoes
- br_tse_eleicoes_2022:
- +materialized: table
- +schema: br_tse_eleicoes_2022
- example:
- +materialized: table # Materialization type (table, table or incremental)
- +schema: example # Overrides the default schema (defaults to what is set on profiles.yml)
- fundacao_lemann:
- +materialized: table
- +schema: fundacao_lemann
- mundo_bm_wdi:
- +materialized: table
- +schema: mundo_bm_wdi
- mundo_transfermarkt_competicoes:
- +materialized: table
- +schema: mundo_transfermarkt_competicoes
- mundo_transfermarkt_competicoes_internacionais:
- +materialized: table
- +schema: mundo_transfermarkt_competicoes_internacionais
- test_dataset:
- +materialized: table
- +schema: test_dataset
- world_ampas_oscar:
- +materialized: table
- +schema: world_ampas_oscar
- world_iea_pirls:
- +materialized: table
- +schema: world_iea_pirls
- world_oceanos_mapeamento:
- +materialized: table
- +schema: world_oceanos_mapeamento
- world_oecd_public_finance:
- +materialized: table
- +schema: world_oecd_public_finance
- world_olympedia_olympics:
- +materialized: table
- +schema: world_olympedia_olympics
- world_wb_mides:
- +materialized: table
- +schema: world_wb_mides
- world_wwf_hydrosheds:
- +materialized: table
- +schema: world_wwf_hydrosheds
- elementary:
- +schema: elementary
+---
+# Name your project! Project names should contain only lowercase characters
+# and underscores. A good package name should reflect your organization's
+# name or the intended use of these models
+name: basedosdados
+version: 1.0.0
+config-version: 2
+
+# This setting configures which "profile" dbt uses for this project.
+profile: default
+# Options to disable elementary models
+vars:
+ disable_run_results: false
+ disable_tests_results: false
+ disable_dbt_artifacts_autoupload: true
+ disable_dbt_invocation_autoupload: true
+# These configurations specify where dbt should look for different types of files.
+# The `source-paths` config, for example, states that models in this project can be
+# found in the "models/" directory. You probably won't need to change these!
+model-paths: [models]
+analysis-paths: [analysis]
+test-paths: [tests]
+seed-paths: [data]
+macro-paths: [macros]
+snapshot-paths: [snapshots]
+target-path: target # directory which will store compiled SQL files
+clean-targets: # directories to be removed by `dbt clean`
+ - target
+ - dbt_modules
+# Grant acess
+# bq data control: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-control-language
+# dbt grant statements use https://discourse.getdbt.com/t/the-exact-grant-statements-we-use-in-a-dbt-project/430
+# Configuring models
+# Full documentation: https://docs.getdbt.com/docs/configuring-models
+# In this example config, we tell dbt to build all models in the example/ directory
+# as tables. These settings can be overridden in the individual model files
+# using the `{{ config(...) }}` macro.
+models:
+ +post-hook: GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "specialGroup:allUsers"
+ +persist_docs:
+ relation: true
+ columns: true
+ basedosdados:
+ # Config indicated by + and applies to all files under models/example/
+ br_anatel_banda_larga_fixa:
+ +materialized: table
+ +schema: br_anatel_banda_larga_fixa
+ br_anatel_telefonia_movel:
+ +materialized: table # Materialization type (table, view or incremental)
+ +schema: br_anatel_telefonia_movel
+ br_anp_precos_combustiveis:
+ +materialized: table
+ +schema: br_anp_precos_combustiveis
+ br_ans_beneficiario:
+ +materialized: incremental
+ +schema: br_ans_beneficiario
+ br_b3_cotacoes:
+ +materialized: table
+ +schema: br_b3_cotacoes
+ br_bcb_agencia:
+ +materialized: table
+ +schema: br_bcb_agencia
+ br_bcb_estban:
+ +materialized: table
+ +schema: br_bcb_estban
+ br_bcb_sicor:
+ +materialized: table
+ +schema: br_bcb_sicor
+ br_bcb_taxa_cambio:
+ +materialized: table
+ +schema: br_bcb_taxa_cambio
+ +post-hook:
+ - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
+ - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org"
+ br_bcb_taxa_selic:
+ +materialized: table
+ +schema: br_bcb_taxa_selic
+ +post-hook:
+ - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
+ - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org"
+ br_bd_diretorios_brasil:
+ +materialized: table
+ +schema: br_bd_diretorios_brasil
+ br_bd_diretorios_mundo:
+ +materialized: table
+ +schema: br_bd_diretorios_mundo
+ br_bd_indicadores:
+ +materialized: table
+ +schema: br_bd_indicadores
+ br_bd_metadados:
+ +materialized: table
+ +schema: br_bd_metadados
+ br_bd_siga_o_dinheiro:
+ +materialized: table
+ +schema: br_bd_siga_o_dinheiro
+ br_camara_dados_abertos:
+ +materialized: table
+ +schema: br_camara_dados_abertos
+ br_ce_fortaleza_sefin_iptu:
+ +materialized: table
+ +schema: br_ce_fortaleza_sefin_iptu
+ +post-hook:
+ - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
+ - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org"
+ br_cgu_beneficios_cidadao:
+ +materialized: table
+ +schema: br_cgu_beneficios_cidadao
+ br_cgu_cartao_pagamento:
+ +materialized: table
+ +schema: br_cgu_cartao_pagamento
+ br_cgu_dados_abertos:
+ +materialized: table
+ +schema: br_cgu_dados_abertos
+ br_cgu_emendas_parlamentares:
+ +materialized: table
+ +schema: br_cgu_emendas_parlamentares
+ br_cgu_pessoal_executivo_federal:
+ +materialized: table
+ +schema: br_cgu_pessoal_executivo_federal
+ br_cgu_servidores_executivo_federal:
+ +materialized: table
+ +schema: br_cgu_servidores_executivo_federal
+ br_cnj_improbidade_administrativa:
+ +materialized: table
+ +schema: br_cnj_improbidade_administrativa
+ br_cnpq_bolsas:
+ +materialized: table
+ +schema: br_cnpq_bolsas
+ br_cvm_administradores_carteira:
+ +materialized: table
+ +schema: br_cvm_administradores_carteira
+ br_cvm_fi:
+ +materialized: table
+ +schema: br_cvm_fi
+ +post-hook:
+ - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
+ - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org"
+ br_cvm_oferta_publica_distribuicao:
+ +materialized: table
+ +schema: br_cvm_oferta_publica_distribuicao
+ br_datahackers_state_data:
+ +materialized: table
+ +schema: br_datahackers_state_data
+ br_denatran_frota:
+ +materialized: table
+ +schema: br_denatran_frota
+ br_fbsp_absp:
+ +materialized: table
+ +schema: br_fbsp_absp
+ br_fgv_igp:
+ +materialized: table
+ +schema: br_fgv_igp
+ br_geobr_mapas:
+ +materialized: table
+ +schema: br_geobr_mapas
+ br_ibge_censo_2022:
+ +materialized: table
+ +schema: br_ibge_censo_2022
+ br_ibge_censo_demografico:
+ +materialized: table
+ +schema: br_ibge_censo_demografico
+ br_ibge_estadic:
+ +materialized: table
+ +schema: br_ibge_estadic
+ br_ibge_inpc:
+ +materialized: table
+ +schema: br_ibge_inpc
+ br_ibge_ipca:
+ +materialized: table
+ +schema: br_ibge_ipca
+ br_ibge_ipca15:
+ +materialized: table
+ +schema: br_ibge_ipca15
+ br_ibge_pam:
+ +materialized: table
+ +schema: br_ibge_pam
+ br_ibge_pevs:
+ +materialized: table
+ +schema: br_ibge_pevs
+ br_ibge_pib:
+ +materialized: table
+ +schema: br_ibge_pib
+ br_ibge_pnad:
+ +materialized: table
+ +schema: br_ibge_pnad
+ br_ibge_pnad_covid:
+ +materialized: table
+ +schema: br_ibge_pnad_covid
+ br_ibge_pnadc:
+ +materialized: table
+ +schema: br_ibge_pnadc
+ br_ibge_pof:
+ +materialized: table
+ +schema: br_ibge_pof
+ br_ibge_ppm:
+ +materialized: table
+ +schema: br_ibge_ppm
+ br_inep_ana:
+ +materialized: table
+ +schema: br_inep_ana
+ br_inep_censo_educacao_superior:
+ +materialized: table
+ +schema: br_inep_censo_educacao_superior
+ br_inep_censo_escolar:
+ +materialized: table
+ +schema: br_inep_censo_escolar
+ br_inep_educacao_especial:
+ +materialized: table
+ +schema: br_inep_educacao_especial
+ br_inep_enem:
+ +materialized: table
+ +schema: br_inep_enem
+ br_inep_formacao_docente:
+ +materialized: table
+ +schema: br_inep_formacao_docente
+ br_inep_ideb:
+ +materialized: table
+ +schema: br_inep_ideb
+ br_inep_indicador_nivel_socioeconomico:
+ +materialized: table
+ +schema: br_inep_indicador_nivel_socioeconomico
+ br_inep_indicadores_educacionais:
+ +materialized: table
+ +schema: br_inep_indicadores_educacionais
+ br_inep_saeb:
+ +materialized: table
+ +schema: br_inep_saeb
+ br_inep_sinopse_estatistica_educacao_basica:
+ +materialized: table
+ +schema: br_inep_sinopse_estatistica_educacao_basica
+ br_inmet_bdmep:
+ +materialized: table
+ +schema: br_inmet_bdmep
+ br_inpe_prodes:
+ +materialized: table
+ +schema: br_inpe_prodes
+ br_inpe_queimadas:
+ +materialized: table
+ +schema: br_inpe_queimadas
+ br_inpe_sisam:
+ +materialized: table
+ +schema: br_inpe_sisam
+ br_ipea_avs:
+ +materialized: table
+ +schema: br_ipea_avs
+ br_jota:
+ +materialized: table
+ +schema: br_jota
+ br_mdr_snis:
+ +materialized: table
+ +schema: br_mdr_snis
+ br_me_caged:
+ +materialized: table
+ +schema: br_me_caged
+ br_me_cno:
+ +materialized: table
+ +schema: br_me_cno
+ br_me_cnpj:
+ +materialized: table
+ +schema: br_me_cnpj
+ br_me_comex_stat:
+ +materialized: table
+ +schema: br_me_comex_stat
+ br_me_rais:
+ +materialized: table
+ +schema: br_me_rais
+ br_me_sic:
+ +materialized: table
+ +schema: br_me_sic
+ br_me_siconfi:
+ +materialized: table
+ +schema: br_me_siconfi
+ br_mec_prouni:
+ +materialized: table
+ +schema: br_mec_prouni
+ br_mec_sisu:
+ +materialized: table
+ +schema: br_mec_sisu
+ br_mercadolivre_ofertas:
+ +materialized: table
+ +schema: br_mercadolivre_ofertas
+ +post-hook:
+ - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
+ - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org"
+ br_mg_belohorizonte_smfa_iptu:
+ +materialized: table
+ +schema: br_mg_belohorizonte_smfa_iptu
+ br_mme_consumo_energia_eletrica:
+ +materialized: table
+ +schema: br_mme_consumo_energia_eletrica
+ br_mp_pep:
+ +materialized: table
+ +schema: br_mp_pep
+ br_ms_cnes:
+ +materialized: table
+ +schema: br_ms_cnes
+ br_ms_pns:
+ +materialized: table
+ +schema: br_ms_pns
+ br_ms_sia:
+ +materialized: table
+ +schema: br_ms_sia
+ br_ms_sih:
+ +materialized: table
+ +schema: br_ms_sih
+ br_ms_sim:
+ +materialized: table
+ +schema: br_ms_sim
+ br_ms_sinan:
+ +materialized: table
+ +schema: br_ms_sinan
+ br_ms_sinasc:
+ +materialized: table
+ +schema: br_ms_sinasc
+ br_ms_sisvan:
+ +materialized: table
+ +schema: br_ms_sisvan
+ br_ms_vacinacao_covid19:
+ +materialized: table
+ +schema: br_ms_vacinacao_covid19
+ br_ons_avaliacao_operacao:
+ +materialized: table
+ +schema: br_ons_avaliacao_operacao
+ br_ons_estimativa_custos:
+ +materialized: table
+ +schema: br_ons_estimativa_custos
+ br_poder360_pesquisas:
+ +materialized: table
+ +schema: br_poder360_pesquisas
+ br_rf_arrecadacao:
+ +materialized: table
+ +schema: br_rf_arrecadacao
+ br_rf_cafir:
+ +materialized: table
+ +schema: br_rf_cafir
+ br_rf_cno:
+ +materialized: table
+ +schema: br_rf_cno
+ br_rj_isp_estatisticas_seguranca:
+ +materialized: table
+ +schema: br_rj_isp_estatisticas_seguranca
+ br_sfb_sicar:
+ +materialized: table
+ +schema: br_sfb_sicar
+ br_sp_saopaulo_dieese_icv:
+ +materialized: table
+ +schema: br_sp_saopaulo_dieese_icv
+ br_sp_saopaulo_geosampa_iptu:
+ +materialized: table
+ +schema: br_sp_saopaulo_geosampa_iptu
+ +post-hook:
+ - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
+ - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org"
+ br_stf_corte_aberta:
+ +materialized: table
+ +schema: br_stf_corte_aberta
+ br_trase_supply_chain:
+ +materialized: table
+ +schema: br_trase_supply_chain
+ br_tse_eleicoes:
+ +materialized: table
+ +schema: br_tse_eleicoes
+ br_tse_eleicoes_2022:
+ +materialized: table
+ +schema: br_tse_eleicoes_2022
+ br_tse_filiacao_partidaria:
+ +materialized: table
+ +schema: br_tse_filiacao_partidaria
+ example:
+ +materialized: table # Materialization type (table, table or incremental)
+ +schema: example # Overrides the default schema (defaults to what is set on profiles.yml)
+ fundacao_lemann:
+ +materialized: table
+ +schema: fundacao_lemann
+ mundo_bm_wdi:
+ +materialized: table
+ +schema: mundo_bm_wdi
+ mundo_transfermarkt_competicoes:
+ +materialized: table
+ +schema: mundo_transfermarkt_competicoes
+ mundo_transfermarkt_competicoes_internacionais:
+ +materialized: table
+ +schema: mundo_transfermarkt_competicoes_internacionais
+ test_dataset:
+ +materialized: table
+ +schema: test_dataset
+ world_ampas_oscar:
+ +materialized: table
+ +schema: world_ampas_oscar
+ world_iea_pirls:
+ +materialized: table
+ +schema: world_iea_pirls
+ world_oceanos_mapeamento:
+ +materialized: table
+ +schema: world_oceanos_mapeamento
+ world_oecd_public_finance:
+ +materialized: table
+ +schema: world_oecd_public_finance
+ world_olympedia_olympics:
+ +materialized: table
+ +schema: world_olympedia_olympics
+ world_wb_mides:
+ +materialized: table
+ +schema: world_wb_mides
+ world_wwf_hydrosheds:
+ +materialized: table
+ +schema: world_wwf_hydrosheds
+ elementary:
+ +schema: elementary
diff --git a/macros/validate_date_range.sql b/macros/validate_date_range.sql
new file mode 100644
index 00000000..79a441ad
--- /dev/null
+++ b/macros/validate_date_range.sql
@@ -0,0 +1,25 @@
+{% macro validate_date_range(column_name, start_date, end_date=None) %}
+
+ {% if end_date is none %}
+ case
+ when {{ column_name }} is null
+ then null
+ when
+ date({{ column_name }}) >= date('{{ start_date }}')
+ and date({{ column_name }}) <= current_date()
+ then safe_cast({{ column_name }} as date)
+ else null
+ end
+ {% else %}
+ case
+ when {{ column_name }} is null
+ then null
+ when
+ date({{ column_name }}) >= date('{{ start_date }}')
+ and date({{ column_name }}) <= date('{{ end_date }}')
+ then safe_cast({{ column_name }} as date)
+ else null
+ end
+ {% endif %}
+
+{% endmacro %}
diff --git a/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__curso.sql b/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__curso.sql
index e62c7827..77215158 100644
--- a/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__curso.sql
+++ b/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__curso.sql
@@ -5,14 +5,13 @@
partition_by={
"field": "ano",
"data_type": "int64",
- "range": {"start": 2009, "end": 2022, "interval": 1},
+ "range": {"start": 2009, "end": 2024, "interval": 1},
},
cluster_by="sigla_uf",
)
}}
select
-
safe_cast(ano as int64) ano,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
diff --git a/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__ies.sql b/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__ies.sql
index 6513fc9a..558f5ecb 100644
--- a/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__ies.sql
+++ b/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__ies.sql
@@ -5,11 +5,12 @@
partition_by={
"field": "ano",
"data_type": "int64",
- "range": {"start": 2009, "end": 2022, "interval": 1},
+ "range": {"start": 2009, "end": 2024, "interval": 1},
},
cluster_by="sigla_uf",
)
}}
+
select
safe_cast(ano as int64) ano,
safe_cast(sigla_uf as string) sigla_uf,
diff --git a/models/br_inep_censo_educacao_superior/schema.yml b/models/br_inep_censo_educacao_superior/schema.yml
index 374496c4..88432749 100644
--- a/models/br_inep_censo_educacao_superior/schema.yml
+++ b/models/br_inep_censo_educacao_superior/schema.yml
@@ -49,18 +49,10 @@ models:
description: Rede de Ensino
- name: id_ies
description: Código da Instituição
- tests:
- - relationships:
- to: ref('br_bd_diretorios_brasil__instituicao_ensino_superior')
- field: id_ies
- name: nome_curso
description: Nome do Curso
- name: id_curso
description: Código do Curso
- tests:
- - relationships:
- to: ref('br_bd_diretorios_brasil__curso_superior')
- field: id_curso
- name: nome_curso_cine
description: Nome do curso, conforme adaptação da Classificação Internacional
Normalizada da Educação Cine/Unesco
@@ -600,10 +592,6 @@ models:
description: Código único de identificação da mantenedora da IES
- name: id_ies
description: Código único de identificação da IES
- tests:
- - relationships:
- to: ref('br_bd_diretorios_brasil__instituicao_ensino_superior')
- field: id_ies
- name: nome
description: Nome da IES
- name: sigla
diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__brasil_distorcao_idade_serie.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__brasil_distorcao_idade_serie.sql
new file mode 100644
index 00000000..b7547236
--- /dev/null
+++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__brasil_distorcao_idade_serie.sql
@@ -0,0 +1,15 @@
+{{
+ config(
+ alias="brasil_distorcao_idade_serie",
+ schema="br_inep_educacao_especial",
+ materialized="table",
+ )
+}}
+
+select
+ safe_cast(ano as int64) ano,
+ safe_cast(etapa_ensino as string) etapa_ensino,
+ safe_cast(tdi as float64) tdi,
+from
+ `basedosdados-staging.br_inep_educacao_especial_staging.brasil_distorcao_idade_serie`
+ as t
diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__brasil_taxa_rendimento.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__brasil_taxa_rendimento.sql
new file mode 100644
index 00000000..953b4c0b
--- /dev/null
+++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__brasil_taxa_rendimento.sql
@@ -0,0 +1,16 @@
+{{
+ config(
+ alias="brasil_taxa_rendimento",
+ schema="br_inep_educacao_especial",
+ materialized="table",
+ )
+}}
+
+select
+ safe_cast(ano as int64) ano,
+ safe_cast(etapa_ensino as string) etapa_ensino,
+ safe_cast(taxa_aprovacao as float64) taxa_aprovacao,
+ safe_cast(taxa_reprovacao as float64) taxa_reprovacao,
+ safe_cast(taxa_abandono as float64) taxa_abandono,
+from
+ `basedosdados-staging.br_inep_educacao_especial_staging.brasil_taxa_rendimento` as t
diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__docente_aee.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__docente_aee.sql
new file mode 100644
index 00000000..45dc6d5b
--- /dev/null
+++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__docente_aee.sql
@@ -0,0 +1,21 @@
+{{
+ config(
+ alias="docente_aee",
+ schema="br_inep_educacao_especial",
+ materialized="table",
+ )
+}}
+
+select
+ safe_cast(ano as int64) ano,
+ safe_cast(sigla_uf as string) sigla_uf,
+ safe_cast(id_municipio as string) id_municipio,
+ safe_cast(quantidade_docente_regente as numeric) quantidade_docente_regente,
+ safe_cast(quantidade_docente_aee as numeric) quantidade_docente_aee,
+ safe_cast(
+ quantidade_docente_regente_formacao_continuada as int64
+ ) quantidade_docente_regente_formacao_continuada,
+ safe_cast(
+ quantidade_docente_aee_formacao_continuada as int64
+ ) quantidade_docente_aee_formacao_continuada,
+from `basedosdados-staging.br_inep_educacao_especial_staging.docente_aee` as t
diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__docente_formacao.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__docente_formacao.sql
new file mode 100644
index 00000000..ba4334ed
--- /dev/null
+++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__docente_formacao.sql
@@ -0,0 +1,18 @@
+{{
+ config(
+ alias="docente_formacao",
+ schema="br_inep_educacao_especial",
+ materialized="table",
+ )
+}}
+
+select
+ safe_cast(ano as int64) ano,
+ safe_cast(sigla_uf as string) sigla_uf,
+ safe_cast(id_municipio as string) id_municipio,
+ safe_cast(rede as string) rede,
+ safe_cast(
+ quantidade_docente_formacao_continuada as numeric
+ ) quantidade_docente_formacao_continuada,
+
+from `basedosdados-staging.br_inep_educacao_especial_staging.docente_formacao` as t
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_etapa_ensino.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__etapa_ensino.sql
similarity index 75%
rename from models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_etapa_ensino.sql
rename to models/br_inep_educacao_especial/br_inep_educacao_especial__etapa_ensino.sql
index 09c49394..2acac0d8 100644
--- a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_etapa_ensino.sql
+++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__etapa_ensino.sql
@@ -1,7 +1,7 @@
{{
config(
- alias="educacao_especial_etapa_ensino",
- schema="br_inep_sinopse_estatistica_educacao_basica",
+ alias="etapa_ensino",
+ schema="br_inep_educacao_especial",
materialized="table",
partition_by={
"field": "ano",
@@ -24,6 +24,4 @@ select
end as string
) etapa_ensino,
safe_cast(quantidade_matricula as numeric) quantidade_matricula
-from
- `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.educacao_especial_etapa_ensino`
- as t
+from `basedosdados-staging.br_inep_educacao_especial_staging.etapa_ensino` as t
diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__faixa_etaria.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__faixa_etaria.sql
new file mode 100644
index 00000000..f4073d74
--- /dev/null
+++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__faixa_etaria.sql
@@ -0,0 +1,21 @@
+{{
+ config(
+ alias="faixa_etaria",
+ schema="br_inep_educacao_especial",
+ materialized="table",
+ partition_by={
+ "field": "ano",
+ "data_type": "int64",
+ "range": {"start": 2007, "end": 2023, "interval": 1},
+ },
+ cluster_by="sigla_uf",
+ )
+}}
+select
+ safe_cast(ano as int64) ano,
+ safe_cast(sigla_uf as string) sigla_uf,
+ safe_cast(id_municipio as string) id_municipio,
+ safe_cast(tipo_classe as string) tipo_classe,
+ safe_cast(faixa_etaria as string) faixa_etaria,
+ safe_cast(quantidade_matricula as numeric) quantidade_matricula,
+from `basedosdados-staging.br_inep_educacao_especial_staging.faixa_etaria` as t
diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__localizacao.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__localizacao.sql
new file mode 100644
index 00000000..cf12893a
--- /dev/null
+++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__localizacao.sql
@@ -0,0 +1,22 @@
+{{
+ config(
+ alias="localizacao",
+ schema="br_inep_educacao_especial",
+ materialized="table",
+ partition_by={
+ "field": "ano",
+ "data_type": "int64",
+ "range": {"start": 2007, "end": 2023, "interval": 1},
+ },
+ cluster_by="sigla_uf",
+ )
+}}
+select
+ safe_cast(ano as int64) ano,
+ safe_cast(sigla_uf as string) sigla_uf,
+ safe_cast(id_municipio as string) id_municipio,
+ safe_cast(tipo_classe as string) tipo_classe,
+ safe_cast(rede as string) rede,
+ safe_cast(localizacao as string) localizacao,
+ safe_cast(quantidade_matricula as numeric) quantidade_matricula,
+from `basedosdados-staging.br_inep_educacao_especial_staging.localizacao` as t
diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__matricula_aee.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__matricula_aee.sql
new file mode 100644
index 00000000..b15b5053
--- /dev/null
+++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__matricula_aee.sql
@@ -0,0 +1,16 @@
+{{
+ config(
+ alias="matricula_aee",
+ schema="br_inep_educacao_especial",
+ materialized="table",
+ )
+}}
+
+select
+ safe_cast(ano as int64) ano,
+ safe_cast(sigla_uf as string) sigla_uf,
+ safe_cast(rede as string) rede,
+ safe_cast(quantidade_matricula as numeric) quantidade_matricula,
+ safe_cast(quantidade_matricula_aee as numeric) quantidade_matricula_aee,
+
+from `basedosdados-staging.br_inep_educacao_especial_staging.matricula_aee` as t
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_sexo_raca_cor.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__sexo_raca_cor.sql
similarity index 73%
rename from models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_sexo_raca_cor.sql
rename to models/br_inep_educacao_especial/br_inep_educacao_especial__sexo_raca_cor.sql
index 96b61508..c9e3b242 100644
--- a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_sexo_raca_cor.sql
+++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__sexo_raca_cor.sql
@@ -1,7 +1,7 @@
{{
config(
- alias="educacao_especial_sexo_raca_cor",
- schema="br_inep_sinopse_estatistica_educacao_basica",
+ alias="sexo_raca_cor",
+ schema="br_inep_educacao_especial",
materialized="table",
partition_by={
"field": "ano",
@@ -21,6 +21,4 @@ select
case when raca_cor = 'Fmarela' then 'Amarela' else raca_cor end as string
) raca_cor,
safe_cast(quantidade_matricula as numeric) quantidade_matricula,
-from
- `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.educacao_especial_sexo_raca_cor`
- as t
+from `basedosdados-staging.br_inep_educacao_especial_staging.sexo_raca_cor` as t
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tempo_ensino.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__tempo_ensino.sql
similarity index 71%
rename from models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tempo_ensino.sql
rename to models/br_inep_educacao_especial/br_inep_educacao_especial__tempo_ensino.sql
index 8de1398b..17158c1b 100644
--- a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tempo_ensino.sql
+++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__tempo_ensino.sql
@@ -1,7 +1,7 @@
{{
config(
- alias="educacao_especial_tempo_ensino",
- schema="br_inep_sinopse_estatistica_educacao_basica",
+ alias="tempo_ensino",
+ schema="br_inep_educacao_especial",
materialized="table",
partition_by={
"field": "ano",
@@ -19,6 +19,4 @@ select
safe_cast(rede as string) rede,
safe_cast(tempo_ensino as string) tempo_ensino,
safe_cast(quantidade_matricula as numeric) quantidade_matricula,
-from
- `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.educacao_especial_tempo_ensino`
- as t
+from `basedosdados-staging.br_inep_educacao_especial_staging.tempo_ensino` as t
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tipo_deficiencia.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__tipo_deficiencia.sql
similarity index 75%
rename from models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tipo_deficiencia.sql
rename to models/br_inep_educacao_especial/br_inep_educacao_especial__tipo_deficiencia.sql
index 988decef..a98caf1a 100644
--- a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tipo_deficiencia.sql
+++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__tipo_deficiencia.sql
@@ -1,7 +1,7 @@
{{
config(
- alias="educacao_especial_tipo_deficiencia",
- schema="br_inep_sinopse_estatistica_educacao_basica",
+ alias="tipo_deficiencia",
+ schema="br_inep_educacao_especial",
materialized="table",
partition_by={
"field": "ano",
@@ -24,6 +24,4 @@ select
end as string
) tipo_deficiencia,
safe_cast(quantidade_matricula as numeric) quantidade_matricula,
-from
- `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.educacao_especial_tipo_deficiencia`
- as t
+from `basedosdados-staging.br_inep_educacao_especial_staging.tipo_deficiencia` as t
diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__uf_distorcao_idade_serie.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__uf_distorcao_idade_serie.sql
new file mode 100644
index 00000000..a4c93bb0
--- /dev/null
+++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__uf_distorcao_idade_serie.sql
@@ -0,0 +1,16 @@
+{{
+ config(
+ alias="uf_distorcao_idade_serie",
+ schema="br_inep_educacao_especial",
+ materialized="table",
+ )
+}}
+
+select
+ safe_cast(ano as int64) ano,
+ safe_cast(sigla_uf as string) sigla_uf,
+ safe_cast(etapa_ensino as string) etapa_ensino,
+ safe_cast(tdi as float64) tdi,
+from
+ `basedosdados-staging.br_inep_educacao_especial_staging.uf_distorcao_idade_serie`
+ as t
diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__uf_taxa_rendimento.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__uf_taxa_rendimento.sql
new file mode 100644
index 00000000..4c8e81d2
--- /dev/null
+++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__uf_taxa_rendimento.sql
@@ -0,0 +1,16 @@
+{{
+ config(
+ alias="uf_taxa_rendimento",
+ schema="br_inep_educacao_especial",
+ materialized="table",
+ )
+}}
+
+select
+ safe_cast(ano as int64) ano,
+ safe_cast(sigla_uf as string) sigla_uf,
+ safe_cast(etapa_ensino as string) etapa_ensino,
+ safe_cast(taxa_aprovacao as float64) taxa_aprovacao,
+ safe_cast(taxa_reprovacao as float64) taxa_reprovacao,
+ safe_cast(taxa_abandono as float64) taxa_abandono,
+from `basedosdados-staging.br_inep_educacao_especial_staging.uf_taxa_rendimento` as t
diff --git a/models/br_inep_educacao_especial/code/educacao_especial_brasil_distorcao_idade_serie.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_brasil_distorcao_idade_serie.ipynb
new file mode 100644
index 00000000..8216a77c
--- /dev/null
+++ b/models/br_inep_educacao_especial/code/educacao_especial_brasil_distorcao_idade_serie.ipynb
@@ -0,0 +1,4579 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import zipfile\n",
+ "import pandas as pd\n",
+ "import basedosdados as bd\n",
+ "\n",
+ "INPUT = os.path.join(os.getcwd(), \"input\")\n",
+ "OUTPUT = os.path.join(os.getcwd(), \"output\")\n",
+ "\n",
+ "os.makedirs(INPUT, exist_ok=True)\n",
+ "os.makedirs(OUTPUT, exist_ok=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def read_sheet(sheet_name: str, skiprows: int = 3) -> pd.DataFrame:\n",
+ " return pd.read_excel(\n",
+ " os.path.join(\n",
+ " INPUT,\n",
+ " \"TDI_-_Alunos_com_defici_ncia.xlsx\"\n",
+ " ),\n",
+ " skiprows=skiprows,\n",
+ " sheet_name=sheet_name\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "excel_data = pd.ExcelFile(os.path.join(\n",
+ " INPUT,\n",
+ " \"TDI_-_Alunos_com_defici_ncia.xlsx\"\n",
+ " ))\n",
+ "\n",
+ "# Get the sheet names\n",
+ "sheet_names = excel_data.sheet_names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfs = {\n",
+ " sheet_name: read_sheet(sheet_name)\n",
+ " for sheet_name in sheet_names\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'2007': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 72.2 60.6 \n",
+ " 1 Norte 76.1 69.7 \n",
+ " 2 Nordeste 74.4 64.7 \n",
+ " 3 Sudeste 69.3 56.5 \n",
+ " 4 Sul 73.3 59.6 \n",
+ " 5 Centro-Oeste 78.7 73.6 \n",
+ " 6 Rondônia 72.8 65.8 \n",
+ " 7 Acre 67.2 60.0 \n",
+ " 8 Amazonas 76.3 75.7 \n",
+ " 9 Roraima 71.0 69.5 \n",
+ " 10 Pará 78.2 68.4 \n",
+ " 11 Amapá 76.4 81.0 \n",
+ " 12 Tocantins 75.4 70.7 \n",
+ " 13 Maranhão 74.2 61.2 \n",
+ " 14 Piauí 83.0 69.3 \n",
+ " 15 Ceará 67.8 56.9 \n",
+ " 16 Rio Grande do Norte 66.4 74.0 \n",
+ " 17 Paraíba 73.3 71.6 \n",
+ " 18 Pernambuco 78.6 64.5 \n",
+ " 19 Alagoas 72.0 79.2 \n",
+ " 20 Sergipe 85.0 85.4 \n",
+ " 21 Bahia 76.7 66.5 \n",
+ " 22 Minas Gerais 79.0 69.1 \n",
+ " 23 Espírito Santo 67.5 69.0 \n",
+ " 24 Rio de Janeiro 85.1 77.2 \n",
+ " 25 São Paulo 60.7 50.2 \n",
+ " 26 Paraná 72.3 54.2 \n",
+ " 27 Santa Catarina 75.8 65.3 \n",
+ " 28 Rio Grande do Sul 73.6 66.1 \n",
+ " 29 Mato Grosso do Sul 86.0 81.2 \n",
+ " 30 Mato Grosso 76.3 66.2 \n",
+ " 31 Goiás 76.8 75.7 \n",
+ " 32 Distrito Federal 74.6 70.6 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 65.3 \n",
+ " 1 75.7 \n",
+ " 2 77.9 \n",
+ " 3 58.4 \n",
+ " 4 62.6 \n",
+ " 5 73.6 \n",
+ " 6 76.2 \n",
+ " 7 60.5 \n",
+ " 8 75.5 \n",
+ " 9 76.9 \n",
+ " 10 77.7 \n",
+ " 11 69.4 \n",
+ " 12 75.9 \n",
+ " 13 76.2 \n",
+ " 14 80.5 \n",
+ " 15 83.7 \n",
+ " 16 78.0 \n",
+ " 17 88.0 \n",
+ " 18 81.3 \n",
+ " 19 88.3 \n",
+ " 20 85.7 \n",
+ " 21 67.1 \n",
+ " 22 69.1 \n",
+ " 23 69.1 \n",
+ " 24 86.7 \n",
+ " 25 51.5 \n",
+ " 26 59.5 \n",
+ " 27 71.9 \n",
+ " 28 60.4 \n",
+ " 29 75.4 \n",
+ " 30 78.1 \n",
+ " 31 75.5 \n",
+ " 32 69.6 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2008': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 66.5 54.5 \n",
+ " 1 Norte 61.2 44.8 \n",
+ " 2 Nordeste 60.7 45.7 \n",
+ " 3 Sudeste 65.7 53.5 \n",
+ " 4 Sul 73.5 63.2 \n",
+ " 5 Centro-Oeste 72.3 63.6 \n",
+ " 6 Rondônia 55.1 35.9 \n",
+ " 7 Acre 43.8 29.7 \n",
+ " 8 Amazonas 58.0 57.0 \n",
+ " 9 Roraima 44.2 28.4 \n",
+ " 10 Pará 65.1 48.3 \n",
+ " 11 Amapá 57.0 54.4 \n",
+ " 12 Tocantins 67.1 46.3 \n",
+ " 13 Maranhão 62.8 44.3 \n",
+ " 14 Piauí 66.3 57.9 \n",
+ " 15 Ceará 55.9 38.6 \n",
+ " 16 Rio Grande do Norte 47.1 45.2 \n",
+ " 17 Paraíba 54.0 48.1 \n",
+ " 18 Pernambuco 73.5 58.6 \n",
+ " 19 Alagoas 49.0 46.1 \n",
+ " 20 Sergipe 68.5 58.9 \n",
+ " 21 Bahia 60.5 44.4 \n",
+ " 22 Minas Gerais 73.1 61.8 \n",
+ " 23 Espírito Santo 62.9 61.2 \n",
+ " 24 Rio de Janeiro 82.7 72.7 \n",
+ " 25 São Paulo 57.5 48.2 \n",
+ " 26 Paraná 78.0 60.6 \n",
+ " 27 Santa Catarina 69.5 64.5 \n",
+ " 28 Rio Grande do Sul 70.1 65.0 \n",
+ " 29 Mato Grosso do Sul 84.6 75.7 \n",
+ " 30 Mato Grosso 72.6 66.2 \n",
+ " 31 Goiás 68.0 62.1 \n",
+ " 32 Distrito Federal 62.0 60.0 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 57.3 \n",
+ " 1 50.7 \n",
+ " 2 49.4 \n",
+ " 3 56.6 \n",
+ " 4 65.0 \n",
+ " 5 60.5 \n",
+ " 6 48.8 \n",
+ " 7 47.2 \n",
+ " 8 62.2 \n",
+ " 9 41.2 \n",
+ " 10 48.7 \n",
+ " 11 60.0 \n",
+ " 12 48.2 \n",
+ " 13 45.3 \n",
+ " 14 51.0 \n",
+ " 15 48.7 \n",
+ " 16 44.2 \n",
+ " 17 52.3 \n",
+ " 18 65.1 \n",
+ " 19 61.1 \n",
+ " 20 65.7 \n",
+ " 21 41.9 \n",
+ " 22 58.6 \n",
+ " 23 73.5 \n",
+ " 24 80.2 \n",
+ " 25 51.5 \n",
+ " 26 59.6 \n",
+ " 27 72.5 \n",
+ " 28 64.0 \n",
+ " 29 80.0 \n",
+ " 30 72.7 \n",
+ " 31 55.8 \n",
+ " 32 60.7 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2009': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 68.4 54.8 \n",
+ " 1 Norte 69.8 51.0 \n",
+ " 2 Nordeste 66.9 50.7 \n",
+ " 3 Sudeste 65.6 52.1 \n",
+ " 4 Sul 75.8 63.8 \n",
+ " 5 Centro-Oeste 73.1 65.6 \n",
+ " 6 Rondônia 64.2 47.3 \n",
+ " 7 Acre 51.6 37.8 \n",
+ " 8 Amazonas 70.6 56.7 \n",
+ " 9 Roraima 50.6 51.7 \n",
+ " 10 Pará 74.2 54.0 \n",
+ " 11 Amapá 64.5 49.2 \n",
+ " 12 Tocantins 72.8 50.5 \n",
+ " 13 Maranhão 64.4 48.8 \n",
+ " 14 Piauí 70.6 47.2 \n",
+ " 15 Ceará 64.3 44.5 \n",
+ " 16 Rio Grande do Norte 53.4 57.9 \n",
+ " 17 Paraíba 70.4 47.9 \n",
+ " 18 Pernambuco 78.3 64.5 \n",
+ " 19 Alagoas 54.1 58.9 \n",
+ " 20 Sergipe 78.2 61.1 \n",
+ " 21 Bahia 65.5 50.2 \n",
+ " 22 Minas Gerais 75.4 64.4 \n",
+ " 23 Espírito Santo 69.9 60.2 \n",
+ " 24 Rio de Janeiro 84.6 74.3 \n",
+ " 25 São Paulo 57.0 47.3 \n",
+ " 26 Paraná 80.6 61.1 \n",
+ " 27 Santa Catarina 57.4 61.8 \n",
+ " 28 Rio Grande do Sul 75.0 67.9 \n",
+ " 29 Mato Grosso do Sul 84.1 78.9 \n",
+ " 30 Mato Grosso 73.2 69.7 \n",
+ " 31 Goiás 65.7 61.2 \n",
+ " 32 Distrito Federal 66.4 64.6 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 56.0 \n",
+ " 1 57.5 \n",
+ " 2 56.6 \n",
+ " 3 52.1 \n",
+ " 4 62.1 \n",
+ " 5 62.2 \n",
+ " 6 45.1 \n",
+ " 7 43.4 \n",
+ " 8 61.4 \n",
+ " 9 21.4 \n",
+ " 10 62.1 \n",
+ " 11 54.7 \n",
+ " 12 55.2 \n",
+ " 13 48.2 \n",
+ " 14 61.4 \n",
+ " 15 52.6 \n",
+ " 16 51.1 \n",
+ " 17 50.2 \n",
+ " 18 69.3 \n",
+ " 19 64.1 \n",
+ " 20 68.9 \n",
+ " 21 54.4 \n",
+ " 22 59.9 \n",
+ " 23 69.9 \n",
+ " 24 74.8 \n",
+ " 25 46.0 \n",
+ " 26 57.4 \n",
+ " 27 57.4 \n",
+ " 28 70.3 \n",
+ " 29 80.6 \n",
+ " 30 70.9 \n",
+ " 31 56.2 \n",
+ " 32 67.5 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2010': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 67.5 57.2 \n",
+ " 1 Norte 70.0 59.4 \n",
+ " 2 Nordeste 67.1 57.8 \n",
+ " 3 Sudeste 65.0 52.8 \n",
+ " 4 Sul 72.4 62.8 \n",
+ " 5 Centro-Oeste 68.6 67.2 \n",
+ " 6 Rondônia 67.5 60.7 \n",
+ " 7 Acre 57.2 49.4 \n",
+ " 8 Amazonas 70.7 64.2 \n",
+ " 9 Roraima 50.6 59.2 \n",
+ " 10 Pará 74.9 62.5 \n",
+ " 11 Amapá 66.7 54.7 \n",
+ " 12 Tocantins 68.6 57.5 \n",
+ " 13 Maranhão 65.2 55.5 \n",
+ " 14 Piauí 69.2 56.1 \n",
+ " 15 Ceará 64.2 50.7 \n",
+ " 16 Rio Grande do Norte 56.2 66.8 \n",
+ " 17 Paraíba 71.7 56.5 \n",
+ " 18 Pernambuco 74.9 65.6 \n",
+ " 19 Alagoas 57.6 60.2 \n",
+ " 20 Sergipe 78.2 72.2 \n",
+ " 21 Bahia 67.4 59.3 \n",
+ " 22 Minas Gerais 69.3 66.1 \n",
+ " 23 Espírito Santo 59.1 59.0 \n",
+ " 24 Rio de Janeiro 82.2 73.3 \n",
+ " 25 São Paulo 58.4 47.4 \n",
+ " 26 Paraná 76.0 59.0 \n",
+ " 27 Santa Catarina 54.1 62.1 \n",
+ " 28 Rio Grande do Sul 73.0 67.8 \n",
+ " 29 Mato Grosso do Sul 81.2 78.7 \n",
+ " 30 Mato Grosso 64.6 66.7 \n",
+ " 31 Goiás 62.2 66.2 \n",
+ " 32 Distrito Federal 62.7 61.7 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 56.4 \n",
+ " 1 63.0 \n",
+ " 2 60.3 \n",
+ " 3 52.1 \n",
+ " 4 55.6 \n",
+ " 5 64.6 \n",
+ " 6 45.9 \n",
+ " 7 56.6 \n",
+ " 8 70.9 \n",
+ " 9 58.6 \n",
+ " 10 70.8 \n",
+ " 11 54.1 \n",
+ " 12 56.0 \n",
+ " 13 56.7 \n",
+ " 14 65.9 \n",
+ " 15 50.6 \n",
+ " 16 67.4 \n",
+ " 17 55.6 \n",
+ " 18 69.9 \n",
+ " 19 69.1 \n",
+ " 20 80.1 \n",
+ " 21 57.6 \n",
+ " 22 58.9 \n",
+ " 23 60.1 \n",
+ " 24 67.7 \n",
+ " 25 47.3 \n",
+ " 26 51.1 \n",
+ " 27 53.0 \n",
+ " 28 62.9 \n",
+ " 29 75.6 \n",
+ " 30 69.6 \n",
+ " 31 63.2 \n",
+ " 32 61.7 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2011': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 66.5 58.5 \n",
+ " 1 Norte 69.1 63.2 \n",
+ " 2 Nordeste 66.9 61.9 \n",
+ " 3 Sudeste 64.7 53.6 \n",
+ " 4 Sul 69.1 60.6 \n",
+ " 5 Centro-Oeste 66.1 66.2 \n",
+ " 6 Rondônia 67.5 61.4 \n",
+ " 7 Acre 57.1 53.4 \n",
+ " 8 Amazonas 69.7 69.5 \n",
+ " 9 Roraima 49.4 65.6 \n",
+ " 10 Pará 73.4 66.5 \n",
+ " 11 Amapá 69.8 55.4 \n",
+ " 12 Tocantins 67.2 62.8 \n",
+ " 13 Maranhão 64.8 60.2 \n",
+ " 14 Piauí 67.1 61.0 \n",
+ " 15 Ceará 62.1 56.8 \n",
+ " 16 Rio Grande do Norte 57.6 68.9 \n",
+ " 17 Paraíba 71.2 60.8 \n",
+ " 18 Pernambuco 74.3 67.4 \n",
+ " 19 Alagoas 60.4 64.5 \n",
+ " 20 Sergipe 78.4 73.3 \n",
+ " 21 Bahia 67.7 61.6 \n",
+ " 22 Minas Gerais 65.1 65.3 \n",
+ " 23 Espírito Santo 54.4 58.0 \n",
+ " 24 Rio de Janeiro 79.6 72.8 \n",
+ " 25 São Paulo 60.4 47.5 \n",
+ " 26 Paraná 73.5 56.1 \n",
+ " 27 Santa Catarina 50.3 60.1 \n",
+ " 28 Rio Grande do Sul 69.8 66.4 \n",
+ " 29 Mato Grosso do Sul 78.4 79.3 \n",
+ " 30 Mato Grosso 58.8 62.5 \n",
+ " 31 Goiás 60.5 65.1 \n",
+ " 32 Distrito Federal 63.0 61.0 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 56.1 \n",
+ " 1 62.9 \n",
+ " 2 59.9 \n",
+ " 3 52.5 \n",
+ " 4 54.3 \n",
+ " 5 62.5 \n",
+ " 6 51.0 \n",
+ " 7 51.2 \n",
+ " 8 68.9 \n",
+ " 9 48.1 \n",
+ " 10 69.1 \n",
+ " 11 57.7 \n",
+ " 12 62.9 \n",
+ " 13 54.6 \n",
+ " 14 63.0 \n",
+ " 15 57.7 \n",
+ " 16 68.8 \n",
+ " 17 50.5 \n",
+ " 18 65.9 \n",
+ " 19 62.8 \n",
+ " 20 73.5 \n",
+ " 21 57.1 \n",
+ " 22 59.8 \n",
+ " 23 59.1 \n",
+ " 24 66.3 \n",
+ " 25 47.4 \n",
+ " 26 50.7 \n",
+ " 27 51.3 \n",
+ " 28 61.2 \n",
+ " 29 68.9 \n",
+ " 30 64.6 \n",
+ " 31 63.8 \n",
+ " 32 57.2 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2012': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 64.4 58.9 \n",
+ " 1 Norte 67.0 64.7 \n",
+ " 2 Nordeste 66.2 63.7 \n",
+ " 3 Sudeste 62.0 53.1 \n",
+ " 4 Sul 67.1 61.4 \n",
+ " 5 Centro-Oeste 62.5 64.3 \n",
+ " 6 Rondônia 67.0 60.8 \n",
+ " 7 Acre 54.5 58.5 \n",
+ " 8 Amazonas 66.0 72.1 \n",
+ " 9 Roraima 43.5 62.5 \n",
+ " 10 Pará 70.2 67.7 \n",
+ " 11 Amapá 68.3 61.9 \n",
+ " 12 Tocantins 68.3 62.9 \n",
+ " 13 Maranhão 62.6 60.3 \n",
+ " 14 Piauí 68.5 61.3 \n",
+ " 15 Ceará 57.8 59.4 \n",
+ " 16 Rio Grande do Norte 57.6 69.9 \n",
+ " 17 Paraíba 69.3 61.9 \n",
+ " 18 Pernambuco 74.2 69.6 \n",
+ " 19 Alagoas 61.7 66.2 \n",
+ " 20 Sergipe 79.5 75.4 \n",
+ " 21 Bahia 68.0 64.0 \n",
+ " 22 Minas Gerais 57.8 63.3 \n",
+ " 23 Espírito Santo 50.4 59.4 \n",
+ " 24 Rio de Janeiro 78.8 71.8 \n",
+ " 25 São Paulo 59.6 46.6 \n",
+ " 26 Paraná 73.8 56.9 \n",
+ " 27 Santa Catarina 41.5 59.9 \n",
+ " 28 Rio Grande do Sul 67.0 67.6 \n",
+ " 29 Mato Grosso do Sul 77.2 78.6 \n",
+ " 30 Mato Grosso 47.1 57.6 \n",
+ " 31 Goiás 56.6 63.0 \n",
+ " 32 Distrito Federal 62.6 59.9 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 55.3 \n",
+ " 1 62.9 \n",
+ " 2 60.9 \n",
+ " 3 51.4 \n",
+ " 4 51.8 \n",
+ " 5 61.5 \n",
+ " 6 53.9 \n",
+ " 7 52.0 \n",
+ " 8 66.0 \n",
+ " 9 57.5 \n",
+ " 10 70.9 \n",
+ " 11 52.5 \n",
+ " 12 61.0 \n",
+ " 13 57.8 \n",
+ " 14 58.3 \n",
+ " 15 59.0 \n",
+ " 16 67.7 \n",
+ " 17 52.1 \n",
+ " 18 67.1 \n",
+ " 19 63.5 \n",
+ " 20 73.7 \n",
+ " 21 59.8 \n",
+ " 22 59.8 \n",
+ " 23 56.2 \n",
+ " 24 60.5 \n",
+ " 25 46.6 \n",
+ " 26 46.9 \n",
+ " 27 49.9 \n",
+ " 28 59.4 \n",
+ " 29 69.8 \n",
+ " 30 57.5 \n",
+ " 31 64.7 \n",
+ " 32 53.6 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2013': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 61.8 59.3 \n",
+ " 1 Norte 65.1 65.2 \n",
+ " 2 Nordeste 63.8 66.2 \n",
+ " 3 Sudeste 59.5 52.2 \n",
+ " 4 Sul 63.0 63.8 \n",
+ " 5 Centro-Oeste 60.1 61.6 \n",
+ " 6 Rondônia 66.3 62.2 \n",
+ " 7 Acre 55.8 56.4 \n",
+ " 8 Amazonas 63.2 66.0 \n",
+ " 9 Roraima 38.4 58.2 \n",
+ " 10 Pará 69.1 69.9 \n",
+ " 11 Amapá 64.4 60.6 \n",
+ " 12 Tocantins 63.3 66.0 \n",
+ " 13 Maranhão 60.5 61.9 \n",
+ " 14 Piauí 66.0 66.3 \n",
+ " 15 Ceará 50.2 63.1 \n",
+ " 16 Rio Grande do Norte 56.4 72.8 \n",
+ " 17 Paraíba 68.0 67.2 \n",
+ " 18 Pernambuco 73.1 71.1 \n",
+ " 19 Alagoas 61.3 65.7 \n",
+ " 20 Sergipe 76.4 77.6 \n",
+ " 21 Bahia 66.8 65.8 \n",
+ " 22 Minas Gerais 52.4 60.5 \n",
+ " 23 Espírito Santo 47.8 61.8 \n",
+ " 24 Rio de Janeiro 77.1 74.3 \n",
+ " 25 São Paulo 58.4 44.8 \n",
+ " 26 Paraná 70.7 59.6 \n",
+ " 27 Santa Catarina 35.1 59.7 \n",
+ " 28 Rio Grande do Sul 62.8 70.3 \n",
+ " 29 Mato Grosso do Sul 76.4 75.2 \n",
+ " 30 Mato Grosso 41.9 47.7 \n",
+ " 31 Goiás 53.5 64.7 \n",
+ " 32 Distrito Federal 63.5 58.7 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 55.4 \n",
+ " 1 62.0 \n",
+ " 2 61.9 \n",
+ " 3 51.0 \n",
+ " 4 52.9 \n",
+ " 5 61.4 \n",
+ " 6 54.1 \n",
+ " 7 48.6 \n",
+ " 8 63.9 \n",
+ " 9 60.3 \n",
+ " 10 70.6 \n",
+ " 11 54.4 \n",
+ " 12 60.4 \n",
+ " 13 57.8 \n",
+ " 14 63.2 \n",
+ " 15 60.9 \n",
+ " 16 65.5 \n",
+ " 17 56.9 \n",
+ " 18 71.0 \n",
+ " 19 55.5 \n",
+ " 20 72.9 \n",
+ " 21 60.0 \n",
+ " 22 61.1 \n",
+ " 23 51.7 \n",
+ " 24 63.1 \n",
+ " 25 46.1 \n",
+ " 26 48.2 \n",
+ " 27 51.8 \n",
+ " 28 59.4 \n",
+ " 29 67.8 \n",
+ " 30 60.5 \n",
+ " 31 65.1 \n",
+ " 32 53.9 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2014': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 58.7 59.6 \n",
+ " 1 Norte 60.7 65.6 \n",
+ " 2 Nordeste 60.4 68.1 \n",
+ " 3 Sudeste 56.4 51.9 \n",
+ " 4 Sul 61.2 64.5 \n",
+ " 5 Centro-Oeste 56.0 58.6 \n",
+ " 6 Rondônia 57.7 63.7 \n",
+ " 7 Acre 53.6 55.5 \n",
+ " 8 Amazonas 61.6 67.2 \n",
+ " 9 Roraima 35.5 54.8 \n",
+ " 10 Pará 64.8 72.0 \n",
+ " 11 Amapá 60.4 60.3 \n",
+ " 12 Tocantins 56.9 65.2 \n",
+ " 13 Maranhão 59.0 63.6 \n",
+ " 14 Piauí 62.1 68.2 \n",
+ " 15 Ceará 42.5 63.9 \n",
+ " 16 Rio Grande do Norte 52.5 74.8 \n",
+ " 17 Paraíba 65.0 69.4 \n",
+ " 18 Pernambuco 71.2 72.7 \n",
+ " 19 Alagoas 60.1 69.4 \n",
+ " 20 Sergipe 71.9 81.3 \n",
+ " 21 Bahia 64.4 67.9 \n",
+ " 22 Minas Gerais 46.0 56.6 \n",
+ " 23 Espírito Santo 45.6 63.0 \n",
+ " 24 Rio de Janeiro 75.3 73.7 \n",
+ " 25 São Paulo 56.1 44.9 \n",
+ " 26 Paraná 73.2 61.2 \n",
+ " 27 Santa Catarina 30.0 57.5 \n",
+ " 28 Rio Grande do Sul 58.9 70.9 \n",
+ " 29 Mato Grosso do Sul 75.1 73.4 \n",
+ " 30 Mato Grosso 35.0 37.8 \n",
+ " 31 Goiás 46.7 62.0 \n",
+ " 32 Distrito Federal 62.9 61.1 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 54.5 \n",
+ " 1 58.9 \n",
+ " 2 64.7 \n",
+ " 3 49.3 \n",
+ " 4 52.4 \n",
+ " 5 61.0 \n",
+ " 6 52.9 \n",
+ " 7 49.4 \n",
+ " 8 67.0 \n",
+ " 9 64.4 \n",
+ " 10 63.8 \n",
+ " 11 57.8 \n",
+ " 12 60.8 \n",
+ " 13 61.1 \n",
+ " 14 63.6 \n",
+ " 15 62.2 \n",
+ " 16 69.0 \n",
+ " 17 60.3 \n",
+ " 18 68.6 \n",
+ " 19 61.3 \n",
+ " 20 74.1 \n",
+ " 21 67.8 \n",
+ " 22 60.5 \n",
+ " 23 53.9 \n",
+ " 24 63.1 \n",
+ " 25 43.5 \n",
+ " 26 48.4 \n",
+ " 27 50.4 \n",
+ " 28 59.0 \n",
+ " 29 66.4 \n",
+ " 30 58.7 \n",
+ " 31 64.4 \n",
+ " 32 54.2 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2015': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 55.8 59.2 \n",
+ " 1 Norte 57.8 66.0 \n",
+ " 2 Nordeste 57.2 67.6 \n",
+ " 3 Sudeste 53.8 51.1 \n",
+ " 4 Sul 57.6 63.6 \n",
+ " 5 Centro-Oeste 53.3 56.2 \n",
+ " 6 Rondônia 53.0 62.4 \n",
+ " 7 Acre 50.5 54.1 \n",
+ " 8 Amazonas 59.2 65.1 \n",
+ " 9 Roraima 32.1 54.7 \n",
+ " 10 Pará 62.8 73.7 \n",
+ " 11 Amapá 55.7 61.7 \n",
+ " 12 Tocantins 53.1 64.9 \n",
+ " 13 Maranhão 55.3 63.7 \n",
+ " 14 Piauí 60.1 67.5 \n",
+ " 15 Ceará 36.3 60.4 \n",
+ " 16 Rio Grande do Norte 49.0 74.2 \n",
+ " 17 Paraíba 62.9 70.4 \n",
+ " 18 Pernambuco 69.3 73.1 \n",
+ " 19 Alagoas 58.1 69.9 \n",
+ " 20 Sergipe 68.2 81.0 \n",
+ " 21 Bahia 62.7 68.6 \n",
+ " 22 Minas Gerais 41.7 53.1 \n",
+ " 23 Espírito Santo 43.0 62.6 \n",
+ " 24 Rio de Janeiro 72.9 72.3 \n",
+ " 25 São Paulo 54.5 44.4 \n",
+ " 26 Paraná 69.7 61.2 \n",
+ " 27 Santa Catarina 26.5 54.1 \n",
+ " 28 Rio Grande do Sul 56.0 70.5 \n",
+ " 29 Mato Grosso do Sul 73.1 69.4 \n",
+ " 30 Mato Grosso 30.8 34.0 \n",
+ " 31 Goiás 42.7 60.6 \n",
+ " 32 Distrito Federal 62.1 59.7 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 53.7 \n",
+ " 1 58.4 \n",
+ " 2 64.4 \n",
+ " 3 48.3 \n",
+ " 4 52.0 \n",
+ " 5 58.3 \n",
+ " 6 53.3 \n",
+ " 7 49.6 \n",
+ " 8 68.1 \n",
+ " 9 54.5 \n",
+ " 10 65.6 \n",
+ " 11 55.5 \n",
+ " 12 58.9 \n",
+ " 13 61.0 \n",
+ " 14 65.6 \n",
+ " 15 62.6 \n",
+ " 16 69.1 \n",
+ " 17 60.4 \n",
+ " 18 66.0 \n",
+ " 19 57.8 \n",
+ " 20 73.7 \n",
+ " 21 67.6 \n",
+ " 22 60.9 \n",
+ " 23 54.2 \n",
+ " 24 62.0 \n",
+ " 25 41.7 \n",
+ " 26 48.0 \n",
+ " 27 50.1 \n",
+ " 28 58.0 \n",
+ " 29 67.2 \n",
+ " 30 52.3 \n",
+ " 31 61.6 \n",
+ " 32 52.7 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2016': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 53.5 58.2 \n",
+ " 1 Norte 55.7 65.6 \n",
+ " 2 Nordeste 54.3 66.9 \n",
+ " 3 Sudeste 51.4 49.7 \n",
+ " 4 Sul 56.9 61.2 \n",
+ " 5 Centro-Oeste 51.0 55.3 \n",
+ " 6 Rondônia 47.2 62.7 \n",
+ " 7 Acre 46.8 55.5 \n",
+ " 8 Amazonas 57.7 63.3 \n",
+ " 9 Roraima 28.9 52.9 \n",
+ " 10 Pará 61.3 73.2 \n",
+ " 11 Amapá 54.4 62.0 \n",
+ " 12 Tocantins 52.5 62.7 \n",
+ " 13 Maranhão 53.1 64.2 \n",
+ " 14 Piauí 57.3 67.5 \n",
+ " 15 Ceará 30.9 55.6 \n",
+ " 16 Rio Grande do Norte 45.4 72.9 \n",
+ " 17 Paraíba 60.3 71.4 \n",
+ " 18 Pernambuco 65.8 74.0 \n",
+ " 19 Alagoas 56.1 70.7 \n",
+ " 20 Sergipe 66.1 79.4 \n",
+ " 21 Bahia 61.4 69.9 \n",
+ " 22 Minas Gerais 39.3 50.9 \n",
+ " 23 Espírito Santo 40.7 59.8 \n",
+ " 24 Rio de Janeiro 70.1 70.2 \n",
+ " 25 São Paulo 51.9 43.3 \n",
+ " 26 Paraná 69.5 59.3 \n",
+ " 27 Santa Catarina 24.5 47.9 \n",
+ " 28 Rio Grande do Sul 54.9 68.4 \n",
+ " 29 Mato Grosso do Sul 71.9 67.3 \n",
+ " 30 Mato Grosso 30.0 31.1 \n",
+ " 31 Goiás 40.0 60.1 \n",
+ " 32 Distrito Federal 59.7 59.9 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 53.9 \n",
+ " 1 60.4 \n",
+ " 2 64.6 \n",
+ " 3 47.8 \n",
+ " 4 53.6 \n",
+ " 5 57.3 \n",
+ " 6 54.4 \n",
+ " 7 51.1 \n",
+ " 8 67.3 \n",
+ " 9 51.1 \n",
+ " 10 69.7 \n",
+ " 11 55.5 \n",
+ " 12 60.9 \n",
+ " 13 58.7 \n",
+ " 14 63.5 \n",
+ " 15 64.1 \n",
+ " 16 68.3 \n",
+ " 17 65.5 \n",
+ " 18 65.4 \n",
+ " 19 64.6 \n",
+ " 20 72.0 \n",
+ " 21 68.0 \n",
+ " 22 59.8 \n",
+ " 23 56.2 \n",
+ " 24 58.3 \n",
+ " 25 41.2 \n",
+ " 26 49.1 \n",
+ " 27 50.7 \n",
+ " 28 61.0 \n",
+ " 29 68.0 \n",
+ " 30 49.8 \n",
+ " 31 61.6 \n",
+ " 32 50.8 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2017': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 51.4 56.8 \n",
+ " 1 Norte 53.1 64.8 \n",
+ " 2 Nordeste 52.2 65.3 \n",
+ " 3 Sudeste 49.4 48.3 \n",
+ " 4 Sul 54.6 59.2 \n",
+ " 5 Centro-Oeste 48.2 52.5 \n",
+ " 6 Rondônia 42.7 60.5 \n",
+ " 7 Acre 45.1 54.0 \n",
+ " 8 Amazonas 54.5 62.9 \n",
+ " 9 Roraima 26.2 50.3 \n",
+ " 10 Pará 59.9 72.7 \n",
+ " 11 Amapá 53.8 60.4 \n",
+ " 12 Tocantins 44.1 61.6 \n",
+ " 13 Maranhão 50.5 63.3 \n",
+ " 14 Piauí 54.4 63.7 \n",
+ " 15 Ceará 28.4 50.8 \n",
+ " 16 Rio Grande do Norte 43.2 69.6 \n",
+ " 17 Paraíba 59.1 71.3 \n",
+ " 18 Pernambuco 62.6 73.2 \n",
+ " 19 Alagoas 55.1 69.8 \n",
+ " 20 Sergipe 62.3 78.4 \n",
+ " 21 Bahia 59.6 70.4 \n",
+ " 22 Minas Gerais 36.8 49.4 \n",
+ " 23 Espírito Santo 37.9 58.2 \n",
+ " 24 Rio de Janeiro 68.0 67.8 \n",
+ " 25 São Paulo 50.8 41.2 \n",
+ " 26 Paraná 66.0 58.0 \n",
+ " 27 Santa Catarina 23.0 44.4 \n",
+ " 28 Rio Grande do Sul 53.4 66.9 \n",
+ " 29 Mato Grosso do Sul 71.5 68.3 \n",
+ " 30 Mato Grosso 29.3 33.3 \n",
+ " 31 Goiás 37.3 54.6 \n",
+ " 32 Distrito Federal 56.8 56.1 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 53.1 \n",
+ " 1 59.9 \n",
+ " 2 61.8 \n",
+ " 3 46.7 \n",
+ " 4 54.9 \n",
+ " 5 55.5 \n",
+ " 6 52.2 \n",
+ " 7 48.1 \n",
+ " 8 64.1 \n",
+ " 9 51.2 \n",
+ " 10 69.8 \n",
+ " 11 54.6 \n",
+ " 12 59.5 \n",
+ " 13 56.2 \n",
+ " 14 59.9 \n",
+ " 15 60.8 \n",
+ " 16 67.9 \n",
+ " 17 66.9 \n",
+ " 18 65.2 \n",
+ " 19 52.6 \n",
+ " 20 75.2 \n",
+ " 21 65.7 \n",
+ " 22 57.4 \n",
+ " 23 51.5 \n",
+ " 24 58.6 \n",
+ " 25 39.6 \n",
+ " 26 51.2 \n",
+ " 27 50.5 \n",
+ " 28 62.0 \n",
+ " 29 65.6 \n",
+ " 30 46.4 \n",
+ " 31 60.5 \n",
+ " 32 50.5 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2018': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 48.4 55.0 \n",
+ " 1 Norte 49.9 62.6 \n",
+ " 2 Nordeste 48.9 63.4 \n",
+ " 3 Sudeste 46.6 46.8 \n",
+ " 4 Sul 52.5 57.0 \n",
+ " 5 Centro-Oeste 44.1 49.8 \n",
+ " 6 Rondônia 36.6 58.4 \n",
+ " 7 Acre 41.4 51.5 \n",
+ " 8 Amazonas 51.6 60.7 \n",
+ " 9 Roraima 25.2 45.0 \n",
+ " 10 Pará 57.7 70.7 \n",
+ " 11 Amapá 51.3 60.2 \n",
+ " 12 Tocantins 38.7 57.7 \n",
+ " 13 Maranhão 47.7 61.2 \n",
+ " 14 Piauí 50.9 62.2 \n",
+ " 15 Ceará 24.6 46.0 \n",
+ " 16 Rio Grande do Norte 41.0 67.5 \n",
+ " 17 Paraíba 57.0 70.7 \n",
+ " 18 Pernambuco 57.0 70.8 \n",
+ " 19 Alagoas 50.1 68.2 \n",
+ " 20 Sergipe 60.8 76.4 \n",
+ " 21 Bahia 56.8 69.9 \n",
+ " 22 Minas Gerais 34.0 48.1 \n",
+ " 23 Espírito Santo 34.5 55.4 \n",
+ " 24 Rio de Janeiro 64.7 66.6 \n",
+ " 25 São Paulo 48.3 38.8 \n",
+ " 26 Paraná 63.0 55.2 \n",
+ " 27 Santa Catarina 21.7 41.4 \n",
+ " 28 Rio Grande do Sul 51.9 65.4 \n",
+ " 29 Mato Grosso do Sul 67.4 67.6 \n",
+ " 30 Mato Grosso 27.9 34.6 \n",
+ " 31 Goiás 32.9 50.0 \n",
+ " 32 Distrito Federal 54.3 53.8 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 53.2 \n",
+ " 1 61.0 \n",
+ " 2 61.2 \n",
+ " 3 47.0 \n",
+ " 4 54.1 \n",
+ " 5 53.2 \n",
+ " 6 51.2 \n",
+ " 7 48.4 \n",
+ " 8 61.4 \n",
+ " 9 52.5 \n",
+ " 10 70.6 \n",
+ " 11 56.1 \n",
+ " 12 58.8 \n",
+ " 13 56.3 \n",
+ " 14 55.7 \n",
+ " 15 57.4 \n",
+ " 16 67.7 \n",
+ " 17 66.1 \n",
+ " 18 63.2 \n",
+ " 19 60.0 \n",
+ " 20 75.8 \n",
+ " 21 63.9 \n",
+ " 22 55.4 \n",
+ " 23 52.4 \n",
+ " 24 60.3 \n",
+ " 25 38.8 \n",
+ " 26 49.9 \n",
+ " 27 47.4 \n",
+ " 28 63.0 \n",
+ " 29 62.6 \n",
+ " 30 43.8 \n",
+ " 31 58.2 \n",
+ " 32 47.6 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2019': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 44.9 53.3 \n",
+ " 1 Norte 46.8 60.9 \n",
+ " 2 Nordeste 45.1 61.3 \n",
+ " 3 Sudeste 42.6 45.2 \n",
+ " 4 Sul 50.1 55.0 \n",
+ " 5 Centro-Oeste 41.1 48.3 \n",
+ " 6 Rondônia 30.9 52.9 \n",
+ " 7 Acre 36.9 49.6 \n",
+ " 8 Amazonas 49.0 59.5 \n",
+ " 9 Roraima 24.8 43.4 \n",
+ " 10 Pará 55.1 69.7 \n",
+ " 11 Amapá 46.4 59.8 \n",
+ " 12 Tocantins 35.5 55.8 \n",
+ " 13 Maranhão 44.7 59.9 \n",
+ " 14 Piauí 45.6 62.2 \n",
+ " 15 Ceará 21.6 40.5 \n",
+ " 16 Rio Grande do Norte 37.6 64.8 \n",
+ " 17 Paraíba 53.8 70.6 \n",
+ " 18 Pernambuco 51.2 68.4 \n",
+ " 19 Alagoas 45.0 65.0 \n",
+ " 20 Sergipe 57.4 75.1 \n",
+ " 21 Bahia 53.5 69.5 \n",
+ " 22 Minas Gerais 30.7 45.6 \n",
+ " 23 Espírito Santo 30.8 53.4 \n",
+ " 24 Rio de Janeiro 60.4 65.9 \n",
+ " 25 São Paulo 43.9 37.5 \n",
+ " 26 Paraná 61.6 51.5 \n",
+ " 27 Santa Catarina 20.0 38.7 \n",
+ " 28 Rio Grande do Sul 49.1 64.2 \n",
+ " 29 Mato Grosso do Sul 65.0 66.2 \n",
+ " 30 Mato Grosso 27.5 33.9 \n",
+ " 31 Goiás 30.2 47.7 \n",
+ " 32 Distrito Federal 44.8 50.5 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 51.1 \n",
+ " 1 59.3 \n",
+ " 2 59.4 \n",
+ " 3 44.7 \n",
+ " 4 50.9 \n",
+ " 5 51.4 \n",
+ " 6 48.3 \n",
+ " 7 46.7 \n",
+ " 8 62.3 \n",
+ " 9 47.7 \n",
+ " 10 68.5 \n",
+ " 11 51.7 \n",
+ " 12 56.8 \n",
+ " 13 54.8 \n",
+ " 14 55.8 \n",
+ " 15 52.0 \n",
+ " 16 66.1 \n",
+ " 17 64.5 \n",
+ " 18 65.5 \n",
+ " 19 59.3 \n",
+ " 20 74.6 \n",
+ " 21 63.1 \n",
+ " 22 50.7 \n",
+ " 23 50.2 \n",
+ " 24 59.5 \n",
+ " 25 36.9 \n",
+ " 26 45.2 \n",
+ " 27 43.0 \n",
+ " 28 59.4 \n",
+ " 29 59.2 \n",
+ " 30 43.9 \n",
+ " 31 56.0 \n",
+ " 32 45.1 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2020': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 41.7 51.6 \n",
+ " 1 Norte 44.0 58.8 \n",
+ " 2 Nordeste 40.9 59.3 \n",
+ " 3 Sudeste 39.3 43.8 \n",
+ " 4 Sul 48.2 53.7 \n",
+ " 5 Centro-Oeste 38.1 45.4 \n",
+ " 6 Rondônia 29.2 47.8 \n",
+ " 7 Acre 34.9 47.4 \n",
+ " 8 Amazonas 45.9 57.8 \n",
+ " 9 Roraima 25.2 37.0 \n",
+ " 10 Pará 52.0 68.6 \n",
+ " 11 Amapá 41.1 59.7 \n",
+ " 12 Tocantins 32.2 52.9 \n",
+ " 13 Maranhão 41.2 58.6 \n",
+ " 14 Piauí 39.8 58.2 \n",
+ " 15 Ceará 18.4 37.1 \n",
+ " 16 Rio Grande do Norte 33.4 62.5 \n",
+ " 17 Paraíba 50.3 68.4 \n",
+ " 18 Pernambuco 43.5 65.2 \n",
+ " 19 Alagoas 39.7 61.2 \n",
+ " 20 Sergipe 53.9 74.8 \n",
+ " 21 Bahia 50.6 68.8 \n",
+ " 22 Minas Gerais 28.2 44.7 \n",
+ " 23 Espírito Santo 27.9 51.2 \n",
+ " 24 Rio de Janeiro 55.4 64.5 \n",
+ " 25 São Paulo 40.7 35.4 \n",
+ " 26 Paraná 61.5 49.7 \n",
+ " 27 Santa Catarina 18.7 37.4 \n",
+ " 28 Rio Grande do Sul 46.0 63.8 \n",
+ " 29 Mato Grosso do Sul 61.7 65.3 \n",
+ " 30 Mato Grosso 24.5 35.0 \n",
+ " 31 Goiás 29.2 42.5 \n",
+ " 32 Distrito Federal 39.9 49.1 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 50.3 \n",
+ " 1 58.5 \n",
+ " 2 58.5 \n",
+ " 3 44.2 \n",
+ " 4 48.8 \n",
+ " 5 49.4 \n",
+ " 6 49.1 \n",
+ " 7 46.7 \n",
+ " 8 58.6 \n",
+ " 9 48.3 \n",
+ " 10 67.6 \n",
+ " 11 52.2 \n",
+ " 12 54.8 \n",
+ " 13 52.4 \n",
+ " 14 55.8 \n",
+ " 15 49.2 \n",
+ " 16 62.7 \n",
+ " 17 62.1 \n",
+ " 18 63.7 \n",
+ " 19 58.8 \n",
+ " 20 71.3 \n",
+ " 21 64.9 \n",
+ " 22 49.6 \n",
+ " 23 49.4 \n",
+ " 24 60.4 \n",
+ " 25 35.7 \n",
+ " 26 44.3 \n",
+ " 27 40.0 \n",
+ " 28 58.0 \n",
+ " 29 59.3 \n",
+ " 30 42.7 \n",
+ " 31 51.9 \n",
+ " 32 44.3 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2021': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 36.6 49.5 \n",
+ " 1 Norte 36.8 57.7 \n",
+ " 2 Nordeste 34.2 56.9 \n",
+ " 3 Sudeste 36.2 41.1 \n",
+ " 4 Sul 43.1 51.1 \n",
+ " 5 Centro-Oeste 33.6 43.3 \n",
+ " 6 Rondônia 23.4 43.6 \n",
+ " 7 Acre 27.9 46.2 \n",
+ " 8 Amazonas 39.8 56.7 \n",
+ " 9 Roraima 20.5 35.8 \n",
+ " 10 Pará 43.4 68.1 \n",
+ " 11 Amapá 34.0 58.8 \n",
+ " 12 Tocantins 27.0 48.7 \n",
+ " 13 Maranhão 35.3 56.5 \n",
+ " 14 Piauí 33.2 56.3 \n",
+ " 15 Ceará 14.6 33.3 \n",
+ " 16 Rio Grande do Norte 23.6 60.1 \n",
+ " 17 Paraíba 42.4 65.7 \n",
+ " 18 Pernambuco 36.4 60.8 \n",
+ " 19 Alagoas 32.2 57.4 \n",
+ " 20 Sergipe 44.5 72.2 \n",
+ " 21 Bahia 42.9 66.6 \n",
+ " 22 Minas Gerais 25.0 40.4 \n",
+ " 23 Espírito Santo 20.4 47.6 \n",
+ " 24 Rio de Janeiro 49.3 63.4 \n",
+ " 25 São Paulo 38.8 32.6 \n",
+ " 26 Paraná 60.2 46.1 \n",
+ " 27 Santa Catarina 14.6 35.1 \n",
+ " 28 Rio Grande do Sul 37.6 61.6 \n",
+ " 29 Mato Grosso do Sul 56.0 63.4 \n",
+ " 30 Mato Grosso 21.1 32.3 \n",
+ " 31 Goiás 25.2 39.8 \n",
+ " 32 Distrito Federal 36.0 48.9 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 48.3 \n",
+ " 1 57.5 \n",
+ " 2 57.8 \n",
+ " 3 41.8 \n",
+ " 4 44.9 \n",
+ " 5 46.1 \n",
+ " 6 48.7 \n",
+ " 7 44.4 \n",
+ " 8 55.9 \n",
+ " 9 43.7 \n",
+ " 10 66.7 \n",
+ " 11 54.5 \n",
+ " 12 52.9 \n",
+ " 13 50.8 \n",
+ " 14 55.3 \n",
+ " 15 44.4 \n",
+ " 16 59.9 \n",
+ " 17 61.8 \n",
+ " 18 61.3 \n",
+ " 19 58.9 \n",
+ " 20 68.7 \n",
+ " 21 68.6 \n",
+ " 22 45.1 \n",
+ " 23 47.3 \n",
+ " 24 57.8 \n",
+ " 25 34.9 \n",
+ " 26 40.6 \n",
+ " 27 37.1 \n",
+ " 28 53.2 \n",
+ " 29 57.9 \n",
+ " 30 40.7 \n",
+ " 31 46.5 \n",
+ " 32 43.6 \n",
+ " 33 NaN \n",
+ " 34 NaN }"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dfs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sheet: 2007\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2008\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2009\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2010\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2011\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2012\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2013\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2014\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2015\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2016\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2017\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2018\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2019\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2020\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2021\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sheet_name, df in dfs.items():\n",
+ " print(f\"Sheet: {sheet_name}\")\n",
+ " print(df.columns) # This will print the column names of each DataFrame\n",
+ " print() # Adds a blank line for readability\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataframes = {}\n",
+ "\n",
+ "for table_name, columns in dfs.items():\n",
+ " df = pd.DataFrame(columns) # Create DataFrame for each table\n",
+ " dataframes[table_name] = df # Store the DataFrame in a dictionary\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "RENAME_COLUMNS = {\n",
+ " 'Unnamed: 1':'id_uf',\n",
+ " 'Unnamed: 2':'nome',\n",
+ " 'Distorção Idade-Série': 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Distorção Idade-Série.1':'Ensino Fundamental – Anos Finais',\n",
+ " 'Distorção Idade-Série.2':'Ensino Médio Regular' \n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n",
+ " cols_drop = [\n",
+ " col\n",
+ " for col in df.columns\n",
+ " if col.startswith(\"Unnamed\")\n",
+ " ]\n",
+ "\n",
+ " return df.drop(columns=cols_drop)\n",
+ "\n",
+ "dfs = {\n",
+ " name: drop_unused_columns(\n",
+ " df.rename(columns=RENAME_COLUMNS, errors=\"raise\")\n",
+ " )\n",
+ " for name, df in dfs.items()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'2007': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 72.2 60.6 \n",
+ " 1 76.1 69.7 \n",
+ " 2 74.4 64.7 \n",
+ " 3 69.3 56.5 \n",
+ " 4 73.3 59.6 \n",
+ " 5 78.7 73.6 \n",
+ " 6 72.8 65.8 \n",
+ " 7 67.2 60.0 \n",
+ " 8 76.3 75.7 \n",
+ " 9 71.0 69.5 \n",
+ " 10 78.2 68.4 \n",
+ " 11 76.4 81.0 \n",
+ " 12 75.4 70.7 \n",
+ " 13 74.2 61.2 \n",
+ " 14 83.0 69.3 \n",
+ " 15 67.8 56.9 \n",
+ " 16 66.4 74.0 \n",
+ " 17 73.3 71.6 \n",
+ " 18 78.6 64.5 \n",
+ " 19 72.0 79.2 \n",
+ " 20 85.0 85.4 \n",
+ " 21 76.7 66.5 \n",
+ " 22 79.0 69.1 \n",
+ " 23 67.5 69.0 \n",
+ " 24 85.1 77.2 \n",
+ " 25 60.7 50.2 \n",
+ " 26 72.3 54.2 \n",
+ " 27 75.8 65.3 \n",
+ " 28 73.6 66.1 \n",
+ " 29 86.0 81.2 \n",
+ " 30 76.3 66.2 \n",
+ " 31 76.8 75.7 \n",
+ " 32 74.6 70.6 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 65.3 \n",
+ " 1 75.7 \n",
+ " 2 77.9 \n",
+ " 3 58.4 \n",
+ " 4 62.6 \n",
+ " 5 73.6 \n",
+ " 6 76.2 \n",
+ " 7 60.5 \n",
+ " 8 75.5 \n",
+ " 9 76.9 \n",
+ " 10 77.7 \n",
+ " 11 69.4 \n",
+ " 12 75.9 \n",
+ " 13 76.2 \n",
+ " 14 80.5 \n",
+ " 15 83.7 \n",
+ " 16 78.0 \n",
+ " 17 88.0 \n",
+ " 18 81.3 \n",
+ " 19 88.3 \n",
+ " 20 85.7 \n",
+ " 21 67.1 \n",
+ " 22 69.1 \n",
+ " 23 69.1 \n",
+ " 24 86.7 \n",
+ " 25 51.5 \n",
+ " 26 59.5 \n",
+ " 27 71.9 \n",
+ " 28 60.4 \n",
+ " 29 75.4 \n",
+ " 30 78.1 \n",
+ " 31 75.5 \n",
+ " 32 69.6 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2008': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 66.5 54.5 \n",
+ " 1 61.2 44.8 \n",
+ " 2 60.7 45.7 \n",
+ " 3 65.7 53.5 \n",
+ " 4 73.5 63.2 \n",
+ " 5 72.3 63.6 \n",
+ " 6 55.1 35.9 \n",
+ " 7 43.8 29.7 \n",
+ " 8 58.0 57.0 \n",
+ " 9 44.2 28.4 \n",
+ " 10 65.1 48.3 \n",
+ " 11 57.0 54.4 \n",
+ " 12 67.1 46.3 \n",
+ " 13 62.8 44.3 \n",
+ " 14 66.3 57.9 \n",
+ " 15 55.9 38.6 \n",
+ " 16 47.1 45.2 \n",
+ " 17 54.0 48.1 \n",
+ " 18 73.5 58.6 \n",
+ " 19 49.0 46.1 \n",
+ " 20 68.5 58.9 \n",
+ " 21 60.5 44.4 \n",
+ " 22 73.1 61.8 \n",
+ " 23 62.9 61.2 \n",
+ " 24 82.7 72.7 \n",
+ " 25 57.5 48.2 \n",
+ " 26 78.0 60.6 \n",
+ " 27 69.5 64.5 \n",
+ " 28 70.1 65.0 \n",
+ " 29 84.6 75.7 \n",
+ " 30 72.6 66.2 \n",
+ " 31 68.0 62.1 \n",
+ " 32 62.0 60.0 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 57.3 \n",
+ " 1 50.7 \n",
+ " 2 49.4 \n",
+ " 3 56.6 \n",
+ " 4 65.0 \n",
+ " 5 60.5 \n",
+ " 6 48.8 \n",
+ " 7 47.2 \n",
+ " 8 62.2 \n",
+ " 9 41.2 \n",
+ " 10 48.7 \n",
+ " 11 60.0 \n",
+ " 12 48.2 \n",
+ " 13 45.3 \n",
+ " 14 51.0 \n",
+ " 15 48.7 \n",
+ " 16 44.2 \n",
+ " 17 52.3 \n",
+ " 18 65.1 \n",
+ " 19 61.1 \n",
+ " 20 65.7 \n",
+ " 21 41.9 \n",
+ " 22 58.6 \n",
+ " 23 73.5 \n",
+ " 24 80.2 \n",
+ " 25 51.5 \n",
+ " 26 59.6 \n",
+ " 27 72.5 \n",
+ " 28 64.0 \n",
+ " 29 80.0 \n",
+ " 30 72.7 \n",
+ " 31 55.8 \n",
+ " 32 60.7 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2009': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 68.4 54.8 \n",
+ " 1 69.8 51.0 \n",
+ " 2 66.9 50.7 \n",
+ " 3 65.6 52.1 \n",
+ " 4 75.8 63.8 \n",
+ " 5 73.1 65.6 \n",
+ " 6 64.2 47.3 \n",
+ " 7 51.6 37.8 \n",
+ " 8 70.6 56.7 \n",
+ " 9 50.6 51.7 \n",
+ " 10 74.2 54.0 \n",
+ " 11 64.5 49.2 \n",
+ " 12 72.8 50.5 \n",
+ " 13 64.4 48.8 \n",
+ " 14 70.6 47.2 \n",
+ " 15 64.3 44.5 \n",
+ " 16 53.4 57.9 \n",
+ " 17 70.4 47.9 \n",
+ " 18 78.3 64.5 \n",
+ " 19 54.1 58.9 \n",
+ " 20 78.2 61.1 \n",
+ " 21 65.5 50.2 \n",
+ " 22 75.4 64.4 \n",
+ " 23 69.9 60.2 \n",
+ " 24 84.6 74.3 \n",
+ " 25 57.0 47.3 \n",
+ " 26 80.6 61.1 \n",
+ " 27 57.4 61.8 \n",
+ " 28 75.0 67.9 \n",
+ " 29 84.1 78.9 \n",
+ " 30 73.2 69.7 \n",
+ " 31 65.7 61.2 \n",
+ " 32 66.4 64.6 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 56.0 \n",
+ " 1 57.5 \n",
+ " 2 56.6 \n",
+ " 3 52.1 \n",
+ " 4 62.1 \n",
+ " 5 62.2 \n",
+ " 6 45.1 \n",
+ " 7 43.4 \n",
+ " 8 61.4 \n",
+ " 9 21.4 \n",
+ " 10 62.1 \n",
+ " 11 54.7 \n",
+ " 12 55.2 \n",
+ " 13 48.2 \n",
+ " 14 61.4 \n",
+ " 15 52.6 \n",
+ " 16 51.1 \n",
+ " 17 50.2 \n",
+ " 18 69.3 \n",
+ " 19 64.1 \n",
+ " 20 68.9 \n",
+ " 21 54.4 \n",
+ " 22 59.9 \n",
+ " 23 69.9 \n",
+ " 24 74.8 \n",
+ " 25 46.0 \n",
+ " 26 57.4 \n",
+ " 27 57.4 \n",
+ " 28 70.3 \n",
+ " 29 80.6 \n",
+ " 30 70.9 \n",
+ " 31 56.2 \n",
+ " 32 67.5 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2010': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 67.5 57.2 \n",
+ " 1 70.0 59.4 \n",
+ " 2 67.1 57.8 \n",
+ " 3 65.0 52.8 \n",
+ " 4 72.4 62.8 \n",
+ " 5 68.6 67.2 \n",
+ " 6 67.5 60.7 \n",
+ " 7 57.2 49.4 \n",
+ " 8 70.7 64.2 \n",
+ " 9 50.6 59.2 \n",
+ " 10 74.9 62.5 \n",
+ " 11 66.7 54.7 \n",
+ " 12 68.6 57.5 \n",
+ " 13 65.2 55.5 \n",
+ " 14 69.2 56.1 \n",
+ " 15 64.2 50.7 \n",
+ " 16 56.2 66.8 \n",
+ " 17 71.7 56.5 \n",
+ " 18 74.9 65.6 \n",
+ " 19 57.6 60.2 \n",
+ " 20 78.2 72.2 \n",
+ " 21 67.4 59.3 \n",
+ " 22 69.3 66.1 \n",
+ " 23 59.1 59.0 \n",
+ " 24 82.2 73.3 \n",
+ " 25 58.4 47.4 \n",
+ " 26 76.0 59.0 \n",
+ " 27 54.1 62.1 \n",
+ " 28 73.0 67.8 \n",
+ " 29 81.2 78.7 \n",
+ " 30 64.6 66.7 \n",
+ " 31 62.2 66.2 \n",
+ " 32 62.7 61.7 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 56.4 \n",
+ " 1 63.0 \n",
+ " 2 60.3 \n",
+ " 3 52.1 \n",
+ " 4 55.6 \n",
+ " 5 64.6 \n",
+ " 6 45.9 \n",
+ " 7 56.6 \n",
+ " 8 70.9 \n",
+ " 9 58.6 \n",
+ " 10 70.8 \n",
+ " 11 54.1 \n",
+ " 12 56.0 \n",
+ " 13 56.7 \n",
+ " 14 65.9 \n",
+ " 15 50.6 \n",
+ " 16 67.4 \n",
+ " 17 55.6 \n",
+ " 18 69.9 \n",
+ " 19 69.1 \n",
+ " 20 80.1 \n",
+ " 21 57.6 \n",
+ " 22 58.9 \n",
+ " 23 60.1 \n",
+ " 24 67.7 \n",
+ " 25 47.3 \n",
+ " 26 51.1 \n",
+ " 27 53.0 \n",
+ " 28 62.9 \n",
+ " 29 75.6 \n",
+ " 30 69.6 \n",
+ " 31 63.2 \n",
+ " 32 61.7 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2011': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 66.5 58.5 \n",
+ " 1 69.1 63.2 \n",
+ " 2 66.9 61.9 \n",
+ " 3 64.7 53.6 \n",
+ " 4 69.1 60.6 \n",
+ " 5 66.1 66.2 \n",
+ " 6 67.5 61.4 \n",
+ " 7 57.1 53.4 \n",
+ " 8 69.7 69.5 \n",
+ " 9 49.4 65.6 \n",
+ " 10 73.4 66.5 \n",
+ " 11 69.8 55.4 \n",
+ " 12 67.2 62.8 \n",
+ " 13 64.8 60.2 \n",
+ " 14 67.1 61.0 \n",
+ " 15 62.1 56.8 \n",
+ " 16 57.6 68.9 \n",
+ " 17 71.2 60.8 \n",
+ " 18 74.3 67.4 \n",
+ " 19 60.4 64.5 \n",
+ " 20 78.4 73.3 \n",
+ " 21 67.7 61.6 \n",
+ " 22 65.1 65.3 \n",
+ " 23 54.4 58.0 \n",
+ " 24 79.6 72.8 \n",
+ " 25 60.4 47.5 \n",
+ " 26 73.5 56.1 \n",
+ " 27 50.3 60.1 \n",
+ " 28 69.8 66.4 \n",
+ " 29 78.4 79.3 \n",
+ " 30 58.8 62.5 \n",
+ " 31 60.5 65.1 \n",
+ " 32 63.0 61.0 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 56.1 \n",
+ " 1 62.9 \n",
+ " 2 59.9 \n",
+ " 3 52.5 \n",
+ " 4 54.3 \n",
+ " 5 62.5 \n",
+ " 6 51.0 \n",
+ " 7 51.2 \n",
+ " 8 68.9 \n",
+ " 9 48.1 \n",
+ " 10 69.1 \n",
+ " 11 57.7 \n",
+ " 12 62.9 \n",
+ " 13 54.6 \n",
+ " 14 63.0 \n",
+ " 15 57.7 \n",
+ " 16 68.8 \n",
+ " 17 50.5 \n",
+ " 18 65.9 \n",
+ " 19 62.8 \n",
+ " 20 73.5 \n",
+ " 21 57.1 \n",
+ " 22 59.8 \n",
+ " 23 59.1 \n",
+ " 24 66.3 \n",
+ " 25 47.4 \n",
+ " 26 50.7 \n",
+ " 27 51.3 \n",
+ " 28 61.2 \n",
+ " 29 68.9 \n",
+ " 30 64.6 \n",
+ " 31 63.8 \n",
+ " 32 57.2 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2012': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 64.4 58.9 \n",
+ " 1 67.0 64.7 \n",
+ " 2 66.2 63.7 \n",
+ " 3 62.0 53.1 \n",
+ " 4 67.1 61.4 \n",
+ " 5 62.5 64.3 \n",
+ " 6 67.0 60.8 \n",
+ " 7 54.5 58.5 \n",
+ " 8 66.0 72.1 \n",
+ " 9 43.5 62.5 \n",
+ " 10 70.2 67.7 \n",
+ " 11 68.3 61.9 \n",
+ " 12 68.3 62.9 \n",
+ " 13 62.6 60.3 \n",
+ " 14 68.5 61.3 \n",
+ " 15 57.8 59.4 \n",
+ " 16 57.6 69.9 \n",
+ " 17 69.3 61.9 \n",
+ " 18 74.2 69.6 \n",
+ " 19 61.7 66.2 \n",
+ " 20 79.5 75.4 \n",
+ " 21 68.0 64.0 \n",
+ " 22 57.8 63.3 \n",
+ " 23 50.4 59.4 \n",
+ " 24 78.8 71.8 \n",
+ " 25 59.6 46.6 \n",
+ " 26 73.8 56.9 \n",
+ " 27 41.5 59.9 \n",
+ " 28 67.0 67.6 \n",
+ " 29 77.2 78.6 \n",
+ " 30 47.1 57.6 \n",
+ " 31 56.6 63.0 \n",
+ " 32 62.6 59.9 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 55.3 \n",
+ " 1 62.9 \n",
+ " 2 60.9 \n",
+ " 3 51.4 \n",
+ " 4 51.8 \n",
+ " 5 61.5 \n",
+ " 6 53.9 \n",
+ " 7 52.0 \n",
+ " 8 66.0 \n",
+ " 9 57.5 \n",
+ " 10 70.9 \n",
+ " 11 52.5 \n",
+ " 12 61.0 \n",
+ " 13 57.8 \n",
+ " 14 58.3 \n",
+ " 15 59.0 \n",
+ " 16 67.7 \n",
+ " 17 52.1 \n",
+ " 18 67.1 \n",
+ " 19 63.5 \n",
+ " 20 73.7 \n",
+ " 21 59.8 \n",
+ " 22 59.8 \n",
+ " 23 56.2 \n",
+ " 24 60.5 \n",
+ " 25 46.6 \n",
+ " 26 46.9 \n",
+ " 27 49.9 \n",
+ " 28 59.4 \n",
+ " 29 69.8 \n",
+ " 30 57.5 \n",
+ " 31 64.7 \n",
+ " 32 53.6 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2013': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 61.8 59.3 \n",
+ " 1 65.1 65.2 \n",
+ " 2 63.8 66.2 \n",
+ " 3 59.5 52.2 \n",
+ " 4 63.0 63.8 \n",
+ " 5 60.1 61.6 \n",
+ " 6 66.3 62.2 \n",
+ " 7 55.8 56.4 \n",
+ " 8 63.2 66.0 \n",
+ " 9 38.4 58.2 \n",
+ " 10 69.1 69.9 \n",
+ " 11 64.4 60.6 \n",
+ " 12 63.3 66.0 \n",
+ " 13 60.5 61.9 \n",
+ " 14 66.0 66.3 \n",
+ " 15 50.2 63.1 \n",
+ " 16 56.4 72.8 \n",
+ " 17 68.0 67.2 \n",
+ " 18 73.1 71.1 \n",
+ " 19 61.3 65.7 \n",
+ " 20 76.4 77.6 \n",
+ " 21 66.8 65.8 \n",
+ " 22 52.4 60.5 \n",
+ " 23 47.8 61.8 \n",
+ " 24 77.1 74.3 \n",
+ " 25 58.4 44.8 \n",
+ " 26 70.7 59.6 \n",
+ " 27 35.1 59.7 \n",
+ " 28 62.8 70.3 \n",
+ " 29 76.4 75.2 \n",
+ " 30 41.9 47.7 \n",
+ " 31 53.5 64.7 \n",
+ " 32 63.5 58.7 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 55.4 \n",
+ " 1 62.0 \n",
+ " 2 61.9 \n",
+ " 3 51.0 \n",
+ " 4 52.9 \n",
+ " 5 61.4 \n",
+ " 6 54.1 \n",
+ " 7 48.6 \n",
+ " 8 63.9 \n",
+ " 9 60.3 \n",
+ " 10 70.6 \n",
+ " 11 54.4 \n",
+ " 12 60.4 \n",
+ " 13 57.8 \n",
+ " 14 63.2 \n",
+ " 15 60.9 \n",
+ " 16 65.5 \n",
+ " 17 56.9 \n",
+ " 18 71.0 \n",
+ " 19 55.5 \n",
+ " 20 72.9 \n",
+ " 21 60.0 \n",
+ " 22 61.1 \n",
+ " 23 51.7 \n",
+ " 24 63.1 \n",
+ " 25 46.1 \n",
+ " 26 48.2 \n",
+ " 27 51.8 \n",
+ " 28 59.4 \n",
+ " 29 67.8 \n",
+ " 30 60.5 \n",
+ " 31 65.1 \n",
+ " 32 53.9 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2014': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 58.7 59.6 \n",
+ " 1 60.7 65.6 \n",
+ " 2 60.4 68.1 \n",
+ " 3 56.4 51.9 \n",
+ " 4 61.2 64.5 \n",
+ " 5 56.0 58.6 \n",
+ " 6 57.7 63.7 \n",
+ " 7 53.6 55.5 \n",
+ " 8 61.6 67.2 \n",
+ " 9 35.5 54.8 \n",
+ " 10 64.8 72.0 \n",
+ " 11 60.4 60.3 \n",
+ " 12 56.9 65.2 \n",
+ " 13 59.0 63.6 \n",
+ " 14 62.1 68.2 \n",
+ " 15 42.5 63.9 \n",
+ " 16 52.5 74.8 \n",
+ " 17 65.0 69.4 \n",
+ " 18 71.2 72.7 \n",
+ " 19 60.1 69.4 \n",
+ " 20 71.9 81.3 \n",
+ " 21 64.4 67.9 \n",
+ " 22 46.0 56.6 \n",
+ " 23 45.6 63.0 \n",
+ " 24 75.3 73.7 \n",
+ " 25 56.1 44.9 \n",
+ " 26 73.2 61.2 \n",
+ " 27 30.0 57.5 \n",
+ " 28 58.9 70.9 \n",
+ " 29 75.1 73.4 \n",
+ " 30 35.0 37.8 \n",
+ " 31 46.7 62.0 \n",
+ " 32 62.9 61.1 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 54.5 \n",
+ " 1 58.9 \n",
+ " 2 64.7 \n",
+ " 3 49.3 \n",
+ " 4 52.4 \n",
+ " 5 61.0 \n",
+ " 6 52.9 \n",
+ " 7 49.4 \n",
+ " 8 67.0 \n",
+ " 9 64.4 \n",
+ " 10 63.8 \n",
+ " 11 57.8 \n",
+ " 12 60.8 \n",
+ " 13 61.1 \n",
+ " 14 63.6 \n",
+ " 15 62.2 \n",
+ " 16 69.0 \n",
+ " 17 60.3 \n",
+ " 18 68.6 \n",
+ " 19 61.3 \n",
+ " 20 74.1 \n",
+ " 21 67.8 \n",
+ " 22 60.5 \n",
+ " 23 53.9 \n",
+ " 24 63.1 \n",
+ " 25 43.5 \n",
+ " 26 48.4 \n",
+ " 27 50.4 \n",
+ " 28 59.0 \n",
+ " 29 66.4 \n",
+ " 30 58.7 \n",
+ " 31 64.4 \n",
+ " 32 54.2 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2015': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 55.8 59.2 \n",
+ " 1 57.8 66.0 \n",
+ " 2 57.2 67.6 \n",
+ " 3 53.8 51.1 \n",
+ " 4 57.6 63.6 \n",
+ " 5 53.3 56.2 \n",
+ " 6 53.0 62.4 \n",
+ " 7 50.5 54.1 \n",
+ " 8 59.2 65.1 \n",
+ " 9 32.1 54.7 \n",
+ " 10 62.8 73.7 \n",
+ " 11 55.7 61.7 \n",
+ " 12 53.1 64.9 \n",
+ " 13 55.3 63.7 \n",
+ " 14 60.1 67.5 \n",
+ " 15 36.3 60.4 \n",
+ " 16 49.0 74.2 \n",
+ " 17 62.9 70.4 \n",
+ " 18 69.3 73.1 \n",
+ " 19 58.1 69.9 \n",
+ " 20 68.2 81.0 \n",
+ " 21 62.7 68.6 \n",
+ " 22 41.7 53.1 \n",
+ " 23 43.0 62.6 \n",
+ " 24 72.9 72.3 \n",
+ " 25 54.5 44.4 \n",
+ " 26 69.7 61.2 \n",
+ " 27 26.5 54.1 \n",
+ " 28 56.0 70.5 \n",
+ " 29 73.1 69.4 \n",
+ " 30 30.8 34.0 \n",
+ " 31 42.7 60.6 \n",
+ " 32 62.1 59.7 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 53.7 \n",
+ " 1 58.4 \n",
+ " 2 64.4 \n",
+ " 3 48.3 \n",
+ " 4 52.0 \n",
+ " 5 58.3 \n",
+ " 6 53.3 \n",
+ " 7 49.6 \n",
+ " 8 68.1 \n",
+ " 9 54.5 \n",
+ " 10 65.6 \n",
+ " 11 55.5 \n",
+ " 12 58.9 \n",
+ " 13 61.0 \n",
+ " 14 65.6 \n",
+ " 15 62.6 \n",
+ " 16 69.1 \n",
+ " 17 60.4 \n",
+ " 18 66.0 \n",
+ " 19 57.8 \n",
+ " 20 73.7 \n",
+ " 21 67.6 \n",
+ " 22 60.9 \n",
+ " 23 54.2 \n",
+ " 24 62.0 \n",
+ " 25 41.7 \n",
+ " 26 48.0 \n",
+ " 27 50.1 \n",
+ " 28 58.0 \n",
+ " 29 67.2 \n",
+ " 30 52.3 \n",
+ " 31 61.6 \n",
+ " 32 52.7 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2016': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 53.5 58.2 \n",
+ " 1 55.7 65.6 \n",
+ " 2 54.3 66.9 \n",
+ " 3 51.4 49.7 \n",
+ " 4 56.9 61.2 \n",
+ " 5 51.0 55.3 \n",
+ " 6 47.2 62.7 \n",
+ " 7 46.8 55.5 \n",
+ " 8 57.7 63.3 \n",
+ " 9 28.9 52.9 \n",
+ " 10 61.3 73.2 \n",
+ " 11 54.4 62.0 \n",
+ " 12 52.5 62.7 \n",
+ " 13 53.1 64.2 \n",
+ " 14 57.3 67.5 \n",
+ " 15 30.9 55.6 \n",
+ " 16 45.4 72.9 \n",
+ " 17 60.3 71.4 \n",
+ " 18 65.8 74.0 \n",
+ " 19 56.1 70.7 \n",
+ " 20 66.1 79.4 \n",
+ " 21 61.4 69.9 \n",
+ " 22 39.3 50.9 \n",
+ " 23 40.7 59.8 \n",
+ " 24 70.1 70.2 \n",
+ " 25 51.9 43.3 \n",
+ " 26 69.5 59.3 \n",
+ " 27 24.5 47.9 \n",
+ " 28 54.9 68.4 \n",
+ " 29 71.9 67.3 \n",
+ " 30 30.0 31.1 \n",
+ " 31 40.0 60.1 \n",
+ " 32 59.7 59.9 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 53.9 \n",
+ " 1 60.4 \n",
+ " 2 64.6 \n",
+ " 3 47.8 \n",
+ " 4 53.6 \n",
+ " 5 57.3 \n",
+ " 6 54.4 \n",
+ " 7 51.1 \n",
+ " 8 67.3 \n",
+ " 9 51.1 \n",
+ " 10 69.7 \n",
+ " 11 55.5 \n",
+ " 12 60.9 \n",
+ " 13 58.7 \n",
+ " 14 63.5 \n",
+ " 15 64.1 \n",
+ " 16 68.3 \n",
+ " 17 65.5 \n",
+ " 18 65.4 \n",
+ " 19 64.6 \n",
+ " 20 72.0 \n",
+ " 21 68.0 \n",
+ " 22 59.8 \n",
+ " 23 56.2 \n",
+ " 24 58.3 \n",
+ " 25 41.2 \n",
+ " 26 49.1 \n",
+ " 27 50.7 \n",
+ " 28 61.0 \n",
+ " 29 68.0 \n",
+ " 30 49.8 \n",
+ " 31 61.6 \n",
+ " 32 50.8 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2017': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 51.4 56.8 \n",
+ " 1 53.1 64.8 \n",
+ " 2 52.2 65.3 \n",
+ " 3 49.4 48.3 \n",
+ " 4 54.6 59.2 \n",
+ " 5 48.2 52.5 \n",
+ " 6 42.7 60.5 \n",
+ " 7 45.1 54.0 \n",
+ " 8 54.5 62.9 \n",
+ " 9 26.2 50.3 \n",
+ " 10 59.9 72.7 \n",
+ " 11 53.8 60.4 \n",
+ " 12 44.1 61.6 \n",
+ " 13 50.5 63.3 \n",
+ " 14 54.4 63.7 \n",
+ " 15 28.4 50.8 \n",
+ " 16 43.2 69.6 \n",
+ " 17 59.1 71.3 \n",
+ " 18 62.6 73.2 \n",
+ " 19 55.1 69.8 \n",
+ " 20 62.3 78.4 \n",
+ " 21 59.6 70.4 \n",
+ " 22 36.8 49.4 \n",
+ " 23 37.9 58.2 \n",
+ " 24 68.0 67.8 \n",
+ " 25 50.8 41.2 \n",
+ " 26 66.0 58.0 \n",
+ " 27 23.0 44.4 \n",
+ " 28 53.4 66.9 \n",
+ " 29 71.5 68.3 \n",
+ " 30 29.3 33.3 \n",
+ " 31 37.3 54.6 \n",
+ " 32 56.8 56.1 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 53.1 \n",
+ " 1 59.9 \n",
+ " 2 61.8 \n",
+ " 3 46.7 \n",
+ " 4 54.9 \n",
+ " 5 55.5 \n",
+ " 6 52.2 \n",
+ " 7 48.1 \n",
+ " 8 64.1 \n",
+ " 9 51.2 \n",
+ " 10 69.8 \n",
+ " 11 54.6 \n",
+ " 12 59.5 \n",
+ " 13 56.2 \n",
+ " 14 59.9 \n",
+ " 15 60.8 \n",
+ " 16 67.9 \n",
+ " 17 66.9 \n",
+ " 18 65.2 \n",
+ " 19 52.6 \n",
+ " 20 75.2 \n",
+ " 21 65.7 \n",
+ " 22 57.4 \n",
+ " 23 51.5 \n",
+ " 24 58.6 \n",
+ " 25 39.6 \n",
+ " 26 51.2 \n",
+ " 27 50.5 \n",
+ " 28 62.0 \n",
+ " 29 65.6 \n",
+ " 30 46.4 \n",
+ " 31 60.5 \n",
+ " 32 50.5 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2018': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 48.4 55.0 \n",
+ " 1 49.9 62.6 \n",
+ " 2 48.9 63.4 \n",
+ " 3 46.6 46.8 \n",
+ " 4 52.5 57.0 \n",
+ " 5 44.1 49.8 \n",
+ " 6 36.6 58.4 \n",
+ " 7 41.4 51.5 \n",
+ " 8 51.6 60.7 \n",
+ " 9 25.2 45.0 \n",
+ " 10 57.7 70.7 \n",
+ " 11 51.3 60.2 \n",
+ " 12 38.7 57.7 \n",
+ " 13 47.7 61.2 \n",
+ " 14 50.9 62.2 \n",
+ " 15 24.6 46.0 \n",
+ " 16 41.0 67.5 \n",
+ " 17 57.0 70.7 \n",
+ " 18 57.0 70.8 \n",
+ " 19 50.1 68.2 \n",
+ " 20 60.8 76.4 \n",
+ " 21 56.8 69.9 \n",
+ " 22 34.0 48.1 \n",
+ " 23 34.5 55.4 \n",
+ " 24 64.7 66.6 \n",
+ " 25 48.3 38.8 \n",
+ " 26 63.0 55.2 \n",
+ " 27 21.7 41.4 \n",
+ " 28 51.9 65.4 \n",
+ " 29 67.4 67.6 \n",
+ " 30 27.9 34.6 \n",
+ " 31 32.9 50.0 \n",
+ " 32 54.3 53.8 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 53.2 \n",
+ " 1 61.0 \n",
+ " 2 61.2 \n",
+ " 3 47.0 \n",
+ " 4 54.1 \n",
+ " 5 53.2 \n",
+ " 6 51.2 \n",
+ " 7 48.4 \n",
+ " 8 61.4 \n",
+ " 9 52.5 \n",
+ " 10 70.6 \n",
+ " 11 56.1 \n",
+ " 12 58.8 \n",
+ " 13 56.3 \n",
+ " 14 55.7 \n",
+ " 15 57.4 \n",
+ " 16 67.7 \n",
+ " 17 66.1 \n",
+ " 18 63.2 \n",
+ " 19 60.0 \n",
+ " 20 75.8 \n",
+ " 21 63.9 \n",
+ " 22 55.4 \n",
+ " 23 52.4 \n",
+ " 24 60.3 \n",
+ " 25 38.8 \n",
+ " 26 49.9 \n",
+ " 27 47.4 \n",
+ " 28 63.0 \n",
+ " 29 62.6 \n",
+ " 30 43.8 \n",
+ " 31 58.2 \n",
+ " 32 47.6 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2019': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 44.9 53.3 \n",
+ " 1 46.8 60.9 \n",
+ " 2 45.1 61.3 \n",
+ " 3 42.6 45.2 \n",
+ " 4 50.1 55.0 \n",
+ " 5 41.1 48.3 \n",
+ " 6 30.9 52.9 \n",
+ " 7 36.9 49.6 \n",
+ " 8 49.0 59.5 \n",
+ " 9 24.8 43.4 \n",
+ " 10 55.1 69.7 \n",
+ " 11 46.4 59.8 \n",
+ " 12 35.5 55.8 \n",
+ " 13 44.7 59.9 \n",
+ " 14 45.6 62.2 \n",
+ " 15 21.6 40.5 \n",
+ " 16 37.6 64.8 \n",
+ " 17 53.8 70.6 \n",
+ " 18 51.2 68.4 \n",
+ " 19 45.0 65.0 \n",
+ " 20 57.4 75.1 \n",
+ " 21 53.5 69.5 \n",
+ " 22 30.7 45.6 \n",
+ " 23 30.8 53.4 \n",
+ " 24 60.4 65.9 \n",
+ " 25 43.9 37.5 \n",
+ " 26 61.6 51.5 \n",
+ " 27 20.0 38.7 \n",
+ " 28 49.1 64.2 \n",
+ " 29 65.0 66.2 \n",
+ " 30 27.5 33.9 \n",
+ " 31 30.2 47.7 \n",
+ " 32 44.8 50.5 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 51.1 \n",
+ " 1 59.3 \n",
+ " 2 59.4 \n",
+ " 3 44.7 \n",
+ " 4 50.9 \n",
+ " 5 51.4 \n",
+ " 6 48.3 \n",
+ " 7 46.7 \n",
+ " 8 62.3 \n",
+ " 9 47.7 \n",
+ " 10 68.5 \n",
+ " 11 51.7 \n",
+ " 12 56.8 \n",
+ " 13 54.8 \n",
+ " 14 55.8 \n",
+ " 15 52.0 \n",
+ " 16 66.1 \n",
+ " 17 64.5 \n",
+ " 18 65.5 \n",
+ " 19 59.3 \n",
+ " 20 74.6 \n",
+ " 21 63.1 \n",
+ " 22 50.7 \n",
+ " 23 50.2 \n",
+ " 24 59.5 \n",
+ " 25 36.9 \n",
+ " 26 45.2 \n",
+ " 27 43.0 \n",
+ " 28 59.4 \n",
+ " 29 59.2 \n",
+ " 30 43.9 \n",
+ " 31 56.0 \n",
+ " 32 45.1 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2020': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 41.7 51.6 \n",
+ " 1 44.0 58.8 \n",
+ " 2 40.9 59.3 \n",
+ " 3 39.3 43.8 \n",
+ " 4 48.2 53.7 \n",
+ " 5 38.1 45.4 \n",
+ " 6 29.2 47.8 \n",
+ " 7 34.9 47.4 \n",
+ " 8 45.9 57.8 \n",
+ " 9 25.2 37.0 \n",
+ " 10 52.0 68.6 \n",
+ " 11 41.1 59.7 \n",
+ " 12 32.2 52.9 \n",
+ " 13 41.2 58.6 \n",
+ " 14 39.8 58.2 \n",
+ " 15 18.4 37.1 \n",
+ " 16 33.4 62.5 \n",
+ " 17 50.3 68.4 \n",
+ " 18 43.5 65.2 \n",
+ " 19 39.7 61.2 \n",
+ " 20 53.9 74.8 \n",
+ " 21 50.6 68.8 \n",
+ " 22 28.2 44.7 \n",
+ " 23 27.9 51.2 \n",
+ " 24 55.4 64.5 \n",
+ " 25 40.7 35.4 \n",
+ " 26 61.5 49.7 \n",
+ " 27 18.7 37.4 \n",
+ " 28 46.0 63.8 \n",
+ " 29 61.7 65.3 \n",
+ " 30 24.5 35.0 \n",
+ " 31 29.2 42.5 \n",
+ " 32 39.9 49.1 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 50.3 \n",
+ " 1 58.5 \n",
+ " 2 58.5 \n",
+ " 3 44.2 \n",
+ " 4 48.8 \n",
+ " 5 49.4 \n",
+ " 6 49.1 \n",
+ " 7 46.7 \n",
+ " 8 58.6 \n",
+ " 9 48.3 \n",
+ " 10 67.6 \n",
+ " 11 52.2 \n",
+ " 12 54.8 \n",
+ " 13 52.4 \n",
+ " 14 55.8 \n",
+ " 15 49.2 \n",
+ " 16 62.7 \n",
+ " 17 62.1 \n",
+ " 18 63.7 \n",
+ " 19 58.8 \n",
+ " 20 71.3 \n",
+ " 21 64.9 \n",
+ " 22 49.6 \n",
+ " 23 49.4 \n",
+ " 24 60.4 \n",
+ " 25 35.7 \n",
+ " 26 44.3 \n",
+ " 27 40.0 \n",
+ " 28 58.0 \n",
+ " 29 59.3 \n",
+ " 30 42.7 \n",
+ " 31 51.9 \n",
+ " 32 44.3 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2021': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 36.6 49.5 \n",
+ " 1 36.8 57.7 \n",
+ " 2 34.2 56.9 \n",
+ " 3 36.2 41.1 \n",
+ " 4 43.1 51.1 \n",
+ " 5 33.6 43.3 \n",
+ " 6 23.4 43.6 \n",
+ " 7 27.9 46.2 \n",
+ " 8 39.8 56.7 \n",
+ " 9 20.5 35.8 \n",
+ " 10 43.4 68.1 \n",
+ " 11 34.0 58.8 \n",
+ " 12 27.0 48.7 \n",
+ " 13 35.3 56.5 \n",
+ " 14 33.2 56.3 \n",
+ " 15 14.6 33.3 \n",
+ " 16 23.6 60.1 \n",
+ " 17 42.4 65.7 \n",
+ " 18 36.4 60.8 \n",
+ " 19 32.2 57.4 \n",
+ " 20 44.5 72.2 \n",
+ " 21 42.9 66.6 \n",
+ " 22 25.0 40.4 \n",
+ " 23 20.4 47.6 \n",
+ " 24 49.3 63.4 \n",
+ " 25 38.8 32.6 \n",
+ " 26 60.2 46.1 \n",
+ " 27 14.6 35.1 \n",
+ " 28 37.6 61.6 \n",
+ " 29 56.0 63.4 \n",
+ " 30 21.1 32.3 \n",
+ " 31 25.2 39.8 \n",
+ " 32 36.0 48.9 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 48.3 \n",
+ " 1 57.5 \n",
+ " 2 57.8 \n",
+ " 3 41.8 \n",
+ " 4 44.9 \n",
+ " 5 46.1 \n",
+ " 6 48.7 \n",
+ " 7 44.4 \n",
+ " 8 55.9 \n",
+ " 9 43.7 \n",
+ " 10 66.7 \n",
+ " 11 54.5 \n",
+ " 12 52.9 \n",
+ " 13 50.8 \n",
+ " 14 55.3 \n",
+ " 15 44.4 \n",
+ " 16 59.9 \n",
+ " 17 61.8 \n",
+ " 18 61.3 \n",
+ " 19 58.9 \n",
+ " 20 68.7 \n",
+ " 21 68.6 \n",
+ " 22 45.1 \n",
+ " 23 47.3 \n",
+ " 24 57.8 \n",
+ " 25 34.9 \n",
+ " 26 40.6 \n",
+ " 27 37.1 \n",
+ " 28 53.2 \n",
+ " 29 57.9 \n",
+ " 30 40.7 \n",
+ " 31 46.5 \n",
+ " 32 43.6 \n",
+ " 33 NaN \n",
+ " 34 NaN }"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dfs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sheet: 2007\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2008\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2009\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2010\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2011\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2012\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2013\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2014\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2015\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2016\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2017\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2018\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2019\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2020\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2021\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sheet_name, df in dfs.items():\n",
+ " print(f\"Sheet: {sheet_name}\")\n",
+ " print(df.columns) # This will print the column names of each DataFrame\n",
+ " print() # Adds a blank line for readability"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "melted_dataframe = pd.concat(\n",
+ " [\n",
+ " df.pipe(\n",
+ " lambda d: d.loc[(d[\"id_uf\"] == 0)]\n",
+ " )\n",
+ " .pipe(\n",
+ " lambda d: pd.melt(\n",
+ " d,\n",
+ " id_vars=[\"id_uf\", \"nome\"],\n",
+ " value_vars=d.columns.difference([\"id_uf\", \"nome\"]).tolist(), # Convert to list\n",
+ " var_name=\"etapa_ensino\",\n",
+ " value_name=\"tdi\",\n",
+ " )\n",
+ " )\n",
+ " .assign(ano=ano)\n",
+ " for ano, df in dfs.items()\n",
+ " ]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id_uf | \n",
+ " nome | \n",
+ " etapa_ensino | \n",
+ " tdi | \n",
+ " ano | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 60.6 | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 72.2 | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 65.3 | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 54.5 | \n",
+ " 2008 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 66.5 | \n",
+ " 2008 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 57.3 | \n",
+ " 2008 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 54.8 | \n",
+ " 2009 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 68.4 | \n",
+ " 2009 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 56.0 | \n",
+ " 2009 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 57.2 | \n",
+ " 2010 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 67.5 | \n",
+ " 2010 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 56.4 | \n",
+ " 2010 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 58.5 | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 66.5 | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 56.1 | \n",
+ " 2011 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 58.9 | \n",
+ " 2012 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 64.4 | \n",
+ " 2012 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 55.3 | \n",
+ " 2012 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 59.3 | \n",
+ " 2013 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 61.8 | \n",
+ " 2013 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 55.4 | \n",
+ " 2013 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 59.6 | \n",
+ " 2014 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 58.7 | \n",
+ " 2014 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 54.5 | \n",
+ " 2014 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 59.2 | \n",
+ " 2015 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 55.8 | \n",
+ " 2015 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 53.7 | \n",
+ " 2015 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 58.2 | \n",
+ " 2016 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 53.5 | \n",
+ " 2016 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 53.9 | \n",
+ " 2016 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 56.8 | \n",
+ " 2017 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 51.4 | \n",
+ " 2017 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 53.1 | \n",
+ " 2017 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 55.0 | \n",
+ " 2018 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 48.4 | \n",
+ " 2018 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 53.2 | \n",
+ " 2018 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 53.3 | \n",
+ " 2019 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 44.9 | \n",
+ " 2019 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 51.1 | \n",
+ " 2019 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 51.6 | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 41.7 | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 50.3 | \n",
+ " 2020 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 49.5 | \n",
+ " 2021 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 36.6 | \n",
+ " 2021 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " Brasil | \n",
+ " Ensino Médio Regular | \n",
+ " 48.3 | \n",
+ " 2021 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id_uf nome etapa_ensino tdi ano\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 60.6 2007\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 72.2 2007\n",
+ "2 0 Brasil Ensino Médio Regular 65.3 2007\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 54.5 2008\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 66.5 2008\n",
+ "2 0 Brasil Ensino Médio Regular 57.3 2008\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 54.8 2009\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 68.4 2009\n",
+ "2 0 Brasil Ensino Médio Regular 56.0 2009\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 57.2 2010\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 67.5 2010\n",
+ "2 0 Brasil Ensino Médio Regular 56.4 2010\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 58.5 2011\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 66.5 2011\n",
+ "2 0 Brasil Ensino Médio Regular 56.1 2011\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 58.9 2012\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 64.4 2012\n",
+ "2 0 Brasil Ensino Médio Regular 55.3 2012\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 59.3 2013\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 61.8 2013\n",
+ "2 0 Brasil Ensino Médio Regular 55.4 2013\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 59.6 2014\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 58.7 2014\n",
+ "2 0 Brasil Ensino Médio Regular 54.5 2014\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 59.2 2015\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 55.8 2015\n",
+ "2 0 Brasil Ensino Médio Regular 53.7 2015\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 58.2 2016\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 53.5 2016\n",
+ "2 0 Brasil Ensino Médio Regular 53.9 2016\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 56.8 2017\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 51.4 2017\n",
+ "2 0 Brasil Ensino Médio Regular 53.1 2017\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 55.0 2018\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 48.4 2018\n",
+ "2 0 Brasil Ensino Médio Regular 53.2 2018\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 53.3 2019\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 44.9 2019\n",
+ "2 0 Brasil Ensino Médio Regular 51.1 2019\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 51.6 2020\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 41.7 2020\n",
+ "2 0 Brasil Ensino Médio Regular 50.3 2020\n",
+ "0 0 Brasil Ensino Fundamental – Anos Finais 49.5 2021\n",
+ "1 0 Brasil Ensino Fundamental – Anos Iniciais 36.6 2021\n",
+ "2 0 Brasil Ensino Médio Regular 48.3 2021"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "melted_dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31,\n",
+ " 32, 33, 35, 41, 42, 43, 50, 51, 52, 53], dtype=object)"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "melted_dataframe['id_uf'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "melted_dataframe = melted_dataframe.drop(\n",
+ " columns=['id_uf', 'nome']\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "melted_dataframe = melted_dataframe[\n",
+ " [\n",
+ " \"ano\",\n",
+ " \"etapa_ensino\",\n",
+ " \"tdi\",\n",
+ " ]\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ano | \n",
+ " etapa_ensino | \n",
+ " tdi | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2007 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 60.6 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2007 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 72.2 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2007 | \n",
+ " Ensino Médio Regular | \n",
+ " 65.3 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2008 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 54.5 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2008 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 66.5 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " Ensino Médio Regular | \n",
+ " 57.3 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2009 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 54.8 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2009 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 68.4 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2009 | \n",
+ " Ensino Médio Regular | \n",
+ " 56.0 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2010 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 57.2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2010 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 67.5 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2010 | \n",
+ " Ensino Médio Regular | \n",
+ " 56.4 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2011 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 58.5 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2011 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 66.5 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2011 | \n",
+ " Ensino Médio Regular | \n",
+ " 56.1 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2012 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 58.9 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2012 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 64.4 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2012 | \n",
+ " Ensino Médio Regular | \n",
+ " 55.3 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2013 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 59.3 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2013 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 61.8 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2013 | \n",
+ " Ensino Médio Regular | \n",
+ " 55.4 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2014 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 59.6 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2014 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 58.7 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2014 | \n",
+ " Ensino Médio Regular | \n",
+ " 54.5 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2015 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 59.2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2015 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 55.8 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2015 | \n",
+ " Ensino Médio Regular | \n",
+ " 53.7 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2016 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 58.2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2016 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 53.5 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2016 | \n",
+ " Ensino Médio Regular | \n",
+ " 53.9 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2017 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 56.8 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2017 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 51.4 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2017 | \n",
+ " Ensino Médio Regular | \n",
+ " 53.1 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2018 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 55.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2018 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 48.4 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2018 | \n",
+ " Ensino Médio Regular | \n",
+ " 53.2 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2019 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 53.3 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2019 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 44.9 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2019 | \n",
+ " Ensino Médio Regular | \n",
+ " 51.1 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2020 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 51.6 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2020 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 41.7 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2020 | \n",
+ " Ensino Médio Regular | \n",
+ " 50.3 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2021 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 49.5 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2021 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 36.6 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2021 | \n",
+ " Ensino Médio Regular | \n",
+ " 48.3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ano etapa_ensino tdi\n",
+ "0 2007 Ensino Fundamental – Anos Finais 60.6\n",
+ "1 2007 Ensino Fundamental – Anos Iniciais 72.2\n",
+ "2 2007 Ensino Médio Regular 65.3\n",
+ "0 2008 Ensino Fundamental – Anos Finais 54.5\n",
+ "1 2008 Ensino Fundamental – Anos Iniciais 66.5\n",
+ "2 2008 Ensino Médio Regular 57.3\n",
+ "0 2009 Ensino Fundamental – Anos Finais 54.8\n",
+ "1 2009 Ensino Fundamental – Anos Iniciais 68.4\n",
+ "2 2009 Ensino Médio Regular 56.0\n",
+ "0 2010 Ensino Fundamental – Anos Finais 57.2\n",
+ "1 2010 Ensino Fundamental – Anos Iniciais 67.5\n",
+ "2 2010 Ensino Médio Regular 56.4\n",
+ "0 2011 Ensino Fundamental – Anos Finais 58.5\n",
+ "1 2011 Ensino Fundamental – Anos Iniciais 66.5\n",
+ "2 2011 Ensino Médio Regular 56.1\n",
+ "0 2012 Ensino Fundamental – Anos Finais 58.9\n",
+ "1 2012 Ensino Fundamental – Anos Iniciais 64.4\n",
+ "2 2012 Ensino Médio Regular 55.3\n",
+ "0 2013 Ensino Fundamental – Anos Finais 59.3\n",
+ "1 2013 Ensino Fundamental – Anos Iniciais 61.8\n",
+ "2 2013 Ensino Médio Regular 55.4\n",
+ "0 2014 Ensino Fundamental – Anos Finais 59.6\n",
+ "1 2014 Ensino Fundamental – Anos Iniciais 58.7\n",
+ "2 2014 Ensino Médio Regular 54.5\n",
+ "0 2015 Ensino Fundamental – Anos Finais 59.2\n",
+ "1 2015 Ensino Fundamental – Anos Iniciais 55.8\n",
+ "2 2015 Ensino Médio Regular 53.7\n",
+ "0 2016 Ensino Fundamental – Anos Finais 58.2\n",
+ "1 2016 Ensino Fundamental – Anos Iniciais 53.5\n",
+ "2 2016 Ensino Médio Regular 53.9\n",
+ "0 2017 Ensino Fundamental – Anos Finais 56.8\n",
+ "1 2017 Ensino Fundamental – Anos Iniciais 51.4\n",
+ "2 2017 Ensino Médio Regular 53.1\n",
+ "0 2018 Ensino Fundamental – Anos Finais 55.0\n",
+ "1 2018 Ensino Fundamental – Anos Iniciais 48.4\n",
+ "2 2018 Ensino Médio Regular 53.2\n",
+ "0 2019 Ensino Fundamental – Anos Finais 53.3\n",
+ "1 2019 Ensino Fundamental – Anos Iniciais 44.9\n",
+ "2 2019 Ensino Médio Regular 51.1\n",
+ "0 2020 Ensino Fundamental – Anos Finais 51.6\n",
+ "1 2020 Ensino Fundamental – Anos Iniciais 41.7\n",
+ "2 2020 Ensino Médio Regular 50.3\n",
+ "0 2021 Ensino Fundamental – Anos Finais 49.5\n",
+ "1 2021 Ensino Fundamental – Anos Iniciais 36.6\n",
+ "2 2021 Ensino Médio Regular 48.3"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "melted_dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path = os.path.join(\n",
+ " OUTPUT, \"educacao_especial_brasil_distorcao_idade_serie\"\n",
+ " )\n",
+ "\n",
+ "os.makedirs(path, exist_ok=True)\n",
+ "melted_dataframe.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.17"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/models/br_inep_educacao_especial/code/educacao_especial_brasil_taxa_rendimento.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_brasil_taxa_rendimento.ipynb
new file mode 100644
index 00000000..ca3a594c
--- /dev/null
+++ b/models/br_inep_educacao_especial/code/educacao_especial_brasil_taxa_rendimento.ipynb
@@ -0,0 +1,1661 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import zipfile\n",
+ "import pandas as pd\n",
+ "import basedosdados as bd\n",
+ "\n",
+ "INPUT = os.path.join(os.getcwd(), \"input\")\n",
+ "OUTPUT = os.path.join(os.getcwd(), \"output\")\n",
+ "\n",
+ "os.makedirs(INPUT, exist_ok=True)\n",
+ "os.makedirs(OUTPUT, exist_ok=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def read_sheet(sheet_name: str, skiprows: int = 8) -> pd.DataFrame:\n",
+ " return pd.read_excel(\n",
+ " os.path.join(\n",
+ " INPUT,\n",
+ " \"tx_rend_brasil_regioes_ufs_esp.xlsx\"\n",
+ " ),\n",
+ " skiprows=skiprows,\n",
+ " sheet_name=sheet_name\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "excel_data = pd.ExcelFile(os.path.join(\n",
+ " INPUT,\n",
+ " \"tx_rend_brasil_regioes_ufs_esp.xlsx\"\n",
+ " ))\n",
+ "\n",
+ "# Get the sheet names\n",
+ "sheet_names = excel_data.sheet_names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfs = {\n",
+ " sheet_name: read_sheet(sheet_name)\n",
+ " for sheet_name in sheet_names\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'BRASIL_REGIOES_UFS ': NU_ANO_CENSO UNIDGEO \\\n",
+ " 0 2007 Brasil \n",
+ " 1 2007 Norte \n",
+ " 2 2007 Nordeste \n",
+ " 3 2007 Sudeste \n",
+ " 4 2007 Sul \n",
+ " .. ... ... \n",
+ " 492 2021 Mato Grosso \n",
+ " 493 2021 Goiás \n",
+ " 494 2021 Distrito Federal \n",
+ " 495 NaN NaN \n",
+ " 496 Fonte: Censo da Educação Básica 2021/INEP. NaN \n",
+ " \n",
+ " NO_CATEGORIA NO_DEPENDENCIA 1_CAT_FUN 1_CAT_FUN_AI 1_CAT_FUN_AF \\\n",
+ " 0 Total Total 73.1 71.6 78.7 \n",
+ " 1 Total Total 63.2 61.4 75.5 \n",
+ " 2 Total Total 67.0 65.7 73.0 \n",
+ " 3 Total Total 77.0 75.6 81.5 \n",
+ " 4 Total Total 76.8 76.4 78.1 \n",
+ " .. ... ... ... ... ... \n",
+ " 492 Total Total 96.3 96.2 96.4 \n",
+ " 493 Total Total 96.5 95.2 97.6 \n",
+ " 494 Total Total 89.1 83.1 95.7 \n",
+ " 495 NaN NaN NaN NaN NaN \n",
+ " 496 NaN NaN NaN NaN NaN \n",
+ " \n",
+ " 1_CAT_MED 2_CAT_FUN 2_CAT_FUN_AI 2_CAT_FUN_AF 2_CAT_MED 3_CAT_FUN \\\n",
+ " 0 79.6 22.1 23.8 16.2 13.0 4.8 \n",
+ " 1 77.5 28.6 30.4 16.6 11.1 8.2 \n",
+ " 2 77.0 24.7 26.2 17.9 11.3 8.3 \n",
+ " 3 79.6 19.9 21.4 14.7 14.9 3.1 \n",
+ " 4 81.4 20.4 21.0 18.3 11.5 2.8 \n",
+ " .. ... ... ... ... ... ... \n",
+ " 492 81.9 3.1 3.4 2.7 13.4 0.6 \n",
+ " 493 97.8 2.8 4.2 1.6 1.3 0.7 \n",
+ " 494 92.2 10.6 16.4 4.1 7.3 0.3 \n",
+ " 495 NaN NaN NaN NaN NaN NaN \n",
+ " 496 NaN NaN NaN NaN NaN NaN \n",
+ " \n",
+ " 3_CAT_FUN_AI 3_CAT_FUN_AF 3_CAT_MED \n",
+ " 0 4.6 5.1 7.4 \n",
+ " 1 8.2 7.9 11.4 \n",
+ " 2 8.1 9.1 11.7 \n",
+ " 3 3.0 3.8 5.5 \n",
+ " 4 2.6 3.6 7.1 \n",
+ " .. ... ... ... \n",
+ " 492 0.4 0.9 4.7 \n",
+ " 493 0.6 0.8 0.9 \n",
+ " 494 0.5 0.2 0.5 \n",
+ " 495 NaN NaN NaN \n",
+ " 496 NaN NaN NaN \n",
+ " \n",
+ " [497 rows x 16 columns]}"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dfs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sheet: BRASIL_REGIOES_UFS \n",
+ "Index(['NU_ANO_CENSO', 'UNIDGEO', 'NO_CATEGORIA', 'NO_DEPENDENCIA',\n",
+ " '1_CAT_FUN', '1_CAT_FUN_AI', '1_CAT_FUN_AF', '1_CAT_MED', '2_CAT_FUN',\n",
+ " '2_CAT_FUN_AI', '2_CAT_FUN_AF', '2_CAT_MED', '3_CAT_FUN',\n",
+ " '3_CAT_FUN_AI', '3_CAT_FUN_AF', '3_CAT_MED'],\n",
+ " dtype='object')\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sheet_name, df in dfs.items():\n",
+ " print(f\"Sheet: {sheet_name}\")\n",
+ " print(df.columns) # This will print the column names of each DataFrame\n",
+ " print() # Adds a blank line for readability\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "RENAME_COLUMNS = {\n",
+ " 'NU_ANO_CENSO':'ano', \n",
+ " 'UNIDGEO':'nome_uf',\n",
+ " '1_CAT_FUN_AI':'taxaaprovacao_anosiniciais', \n",
+ " '1_CAT_FUN_AF':'taxaaprovacao_anosfinais', \n",
+ " '1_CAT_MED':'taxaaprovacao_ensinomedio', \n",
+ " '2_CAT_FUN_AI':'taxareprovacao_anosiniciais', \n",
+ " '2_CAT_FUN_AF':'taxareprovacao_anosfinais', \n",
+ " '2_CAT_MED':'taxareprovacao_ensinomedio', \n",
+ " '3_CAT_FUN_AI':'taxaabandono_anosiniciais', \n",
+ " '3_CAT_FUN_AF':'taxaabandono_anosfinais', \n",
+ " '3_CAT_MED' : 'taxaabandono_ensinomedio' \n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n",
+ " cols_drop = [\n",
+ " col\n",
+ " for col in df.columns\n",
+ " if col.startswith(\"NO_\") \n",
+ " or col.startswith(\"1_\") \n",
+ " or col.startswith(\"2_\") \n",
+ " or col.startswith(\"3_\")\n",
+ " ]\n",
+ "\n",
+ " return df.drop(columns=cols_drop)\n",
+ "\n",
+ "dfs = {\n",
+ " name: drop_unused_columns(\n",
+ " df.rename(columns=RENAME_COLUMNS, errors=\"raise\")\n",
+ " )\n",
+ " for name, df in dfs.items()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'BRASIL_REGIOES_UFS ': ano nome_uf \\\n",
+ " 0 2007 Brasil \n",
+ " 1 2007 Norte \n",
+ " 2 2007 Nordeste \n",
+ " 3 2007 Sudeste \n",
+ " 4 2007 Sul \n",
+ " .. ... ... \n",
+ " 492 2021 Mato Grosso \n",
+ " 493 2021 Goiás \n",
+ " 494 2021 Distrito Federal \n",
+ " 495 NaN NaN \n",
+ " 496 Fonte: Censo da Educação Básica 2021/INEP. NaN \n",
+ " \n",
+ " taxaaprovacao_anosiniciais taxaaprovacao_anosfinais \\\n",
+ " 0 71.6 78.7 \n",
+ " 1 61.4 75.5 \n",
+ " 2 65.7 73.0 \n",
+ " 3 75.6 81.5 \n",
+ " 4 76.4 78.1 \n",
+ " .. ... ... \n",
+ " 492 96.2 96.4 \n",
+ " 493 95.2 97.6 \n",
+ " 494 83.1 95.7 \n",
+ " 495 NaN NaN \n",
+ " 496 NaN NaN \n",
+ " \n",
+ " taxaaprovacao_ensinomedio taxareprovacao_anosiniciais \\\n",
+ " 0 79.6 23.8 \n",
+ " 1 77.5 30.4 \n",
+ " 2 77.0 26.2 \n",
+ " 3 79.6 21.4 \n",
+ " 4 81.4 21.0 \n",
+ " .. ... ... \n",
+ " 492 81.9 3.4 \n",
+ " 493 97.8 4.2 \n",
+ " 494 92.2 16.4 \n",
+ " 495 NaN NaN \n",
+ " 496 NaN NaN \n",
+ " \n",
+ " taxareprovacao_anosfinais taxareprovacao_ensinomedio \\\n",
+ " 0 16.2 13.0 \n",
+ " 1 16.6 11.1 \n",
+ " 2 17.9 11.3 \n",
+ " 3 14.7 14.9 \n",
+ " 4 18.3 11.5 \n",
+ " .. ... ... \n",
+ " 492 2.7 13.4 \n",
+ " 493 1.6 1.3 \n",
+ " 494 4.1 7.3 \n",
+ " 495 NaN NaN \n",
+ " 496 NaN NaN \n",
+ " \n",
+ " taxaabandono_anosiniciais taxaabandono_anosfinais \\\n",
+ " 0 4.6 5.1 \n",
+ " 1 8.2 7.9 \n",
+ " 2 8.1 9.1 \n",
+ " 3 3.0 3.8 \n",
+ " 4 2.6 3.6 \n",
+ " .. ... ... \n",
+ " 492 0.4 0.9 \n",
+ " 493 0.6 0.8 \n",
+ " 494 0.5 0.2 \n",
+ " 495 NaN NaN \n",
+ " 496 NaN NaN \n",
+ " \n",
+ " taxaabandono_ensinomedio \n",
+ " 0 7.4 \n",
+ " 1 11.4 \n",
+ " 2 11.7 \n",
+ " 3 5.5 \n",
+ " 4 7.1 \n",
+ " .. ... \n",
+ " 492 4.7 \n",
+ " 493 0.9 \n",
+ " 494 0.5 \n",
+ " 495 NaN \n",
+ " 496 NaN \n",
+ " \n",
+ " [497 rows x 11 columns]}"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dfs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sheet: BRASIL_REGIOES_UFS \n",
+ "Index(['ano', 'nome_uf', 'taxaaprovacao_anosiniciais',\n",
+ " 'taxaaprovacao_anosfinais', 'taxaaprovacao_ensinomedio',\n",
+ " 'taxareprovacao_anosiniciais', 'taxareprovacao_anosfinais',\n",
+ " 'taxareprovacao_ensinomedio', 'taxaabandono_anosiniciais',\n",
+ " 'taxaabandono_anosfinais', 'taxaabandono_ensinomedio'],\n",
+ " dtype='object')\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sheet_name, df in dfs.items():\n",
+ " print(f\"Sheet: {sheet_name}\")\n",
+ " print(df.columns) # This will print the column names of each DataFrame\n",
+ " print() # Adds a blank line for readability"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "melted_dataframe = pd.concat(\n",
+ " [\n",
+ " df.pipe(\n",
+ " lambda d: d.loc[(d[\"nome_uf\"] == 'Brasil' )]\n",
+ " )\n",
+ " .pipe(\n",
+ " lambda d: pd.melt(\n",
+ " d,\n",
+ " id_vars=[\"ano\", \"nome_uf\"],\n",
+ " value_vars=d.columns.difference([\"ano\", \"nome_uf\"]).tolist(), # Convert to list\n",
+ " var_name=\"metrica\",\n",
+ " value_name=\"valor\",\n",
+ " )\n",
+ " )\n",
+ " ]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ano | \n",
+ " nome_uf | \n",
+ " metrica | \n",
+ " valor | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2007 | \n",
+ " Brasil | \n",
+ " taxaabandono_anosfinais | \n",
+ " 5.1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2008 | \n",
+ " Brasil | \n",
+ " taxaabandono_anosfinais | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2009 | \n",
+ " Brasil | \n",
+ " taxaabandono_anosfinais | \n",
+ " 4.3 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2010 | \n",
+ " Brasil | \n",
+ " taxaabandono_anosfinais | \n",
+ " 4.6 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2011 | \n",
+ " Brasil | \n",
+ " taxaabandono_anosfinais | \n",
+ " 4.6 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 130 | \n",
+ " 2017 | \n",
+ " Brasil | \n",
+ " taxareprovacao_ensinomedio | \n",
+ " 11.2 | \n",
+ "
\n",
+ " \n",
+ " 131 | \n",
+ " 2018 | \n",
+ " Brasil | \n",
+ " taxareprovacao_ensinomedio | \n",
+ " 11.4 | \n",
+ "
\n",
+ " \n",
+ " 132 | \n",
+ " 2019 | \n",
+ " Brasil | \n",
+ " taxareprovacao_ensinomedio | \n",
+ " 9.4 | \n",
+ "
\n",
+ " \n",
+ " 133 | \n",
+ " 2020 | \n",
+ " Brasil | \n",
+ " taxareprovacao_ensinomedio | \n",
+ " 2.2 | \n",
+ "
\n",
+ " \n",
+ " 134 | \n",
+ " 2021 | \n",
+ " Brasil | \n",
+ " taxareprovacao_ensinomedio | \n",
+ " 3.8 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
135 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ano nome_uf metrica valor\n",
+ "0 2007 Brasil taxaabandono_anosfinais 5.1\n",
+ "1 2008 Brasil taxaabandono_anosfinais 5.0\n",
+ "2 2009 Brasil taxaabandono_anosfinais 4.3\n",
+ "3 2010 Brasil taxaabandono_anosfinais 4.6\n",
+ "4 2011 Brasil taxaabandono_anosfinais 4.6\n",
+ ".. ... ... ... ...\n",
+ "130 2017 Brasil taxareprovacao_ensinomedio 11.2\n",
+ "131 2018 Brasil taxareprovacao_ensinomedio 11.4\n",
+ "132 2019 Brasil taxareprovacao_ensinomedio 9.4\n",
+ "133 2020 Brasil taxareprovacao_ensinomedio 2.2\n",
+ "134 2021 Brasil taxareprovacao_ensinomedio 3.8\n",
+ "\n",
+ "[135 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "melted_dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "melted_dataframe['etapa_ensino'] = melted_dataframe['metrica'].apply(\n",
+ " lambda v: v.split('_')[-1]) # Extracts 'anosiniciais', 'anosfinais', or 'ensinomedio'\n",
+ "melted_dataframe['tipo_metrica'] = melted_dataframe['metrica'].apply(\n",
+ " lambda v: v.split('_')[0]) # Extracts 'taxaaprovacao', 'taxareprovacao', 'taxaabandono'\n",
+ "\n",
+ "# Pivoting the melted DataFrame to get desired columns\n",
+ "df_final = melted_dataframe.pivot_table(index=['ano', 'nome_uf', 'etapa_ensino'], \n",
+ " columns='tipo_metrica', \n",
+ " values='valor').reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "RENAME_COLUMNS_MELTED = {\n",
+ " 'taxaabandono':'taxa_abandono', \n",
+ " 'taxaaprovacao':'taxa_aprovacao',\n",
+ " 'taxareprovacao':'taxa_reprovacao' \n",
+ "}\n",
+ "\n",
+ "etapa_ensino = {\n",
+ " 'anosiniciais': 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'anosfinais':'Ensino Fundamental – Anos Finais',\n",
+ " 'ensinomedio':'Ensino Médio Regular'\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " tipo_metrica | \n",
+ " ano | \n",
+ " nome_uf | \n",
+ " etapa_ensino | \n",
+ " taxaabandono | \n",
+ " taxaaprovacao | \n",
+ " taxareprovacao | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2007 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 5.1 | \n",
+ " 78.7 | \n",
+ " 16.2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2007 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 4.6 | \n",
+ " 71.6 | \n",
+ " 23.8 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2007 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 7.4 | \n",
+ " 79.6 | \n",
+ " 13.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2008 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 5.0 | \n",
+ " 78.8 | \n",
+ " 16.2 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2008 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 4.5 | \n",
+ " 72.4 | \n",
+ " 23.1 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 2008 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 7.8 | \n",
+ " 79.3 | \n",
+ " 12.9 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 2009 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 4.3 | \n",
+ " 81.5 | \n",
+ " 14.2 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 2009 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 3.3 | \n",
+ " 74.4 | \n",
+ " 22.3 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 2009 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 7.0 | \n",
+ " 80.0 | \n",
+ " 13.0 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 2010 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 4.6 | \n",
+ " 81.1 | \n",
+ " 14.3 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 2010 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 3.3 | \n",
+ " 73.7 | \n",
+ " 23.0 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 2010 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 7.4 | \n",
+ " 79.4 | \n",
+ " 13.2 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 2011 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 4.6 | \n",
+ " 80.4 | \n",
+ " 15.0 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 2011 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 3.3 | \n",
+ " 74.5 | \n",
+ " 22.2 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 2011 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 7.1 | \n",
+ " 79.4 | \n",
+ " 13.5 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 2012 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 4.7 | \n",
+ " 80.2 | \n",
+ " 15.1 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 2012 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 3.3 | \n",
+ " 73.1 | \n",
+ " 23.6 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 2012 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 7.1 | \n",
+ " 80.1 | \n",
+ " 12.8 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 2013 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 4.4 | \n",
+ " 81.7 | \n",
+ " 13.9 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 2013 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 2.9 | \n",
+ " 75.2 | \n",
+ " 21.9 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 2013 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 6.8 | \n",
+ " 81.7 | \n",
+ " 11.5 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 2014 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 4.4 | \n",
+ " 80.9 | \n",
+ " 14.7 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 2014 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 2.9 | \n",
+ " 73.7 | \n",
+ " 23.4 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 2014 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 6.1 | \n",
+ " 82.0 | \n",
+ " 11.9 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 2015 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 4.3 | \n",
+ " 81.0 | \n",
+ " 14.7 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 2015 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 2.7 | \n",
+ " 74.5 | \n",
+ " 22.8 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 2015 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 6.2 | \n",
+ " 82.3 | \n",
+ " 11.5 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 2016 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 4.4 | \n",
+ " 80.5 | \n",
+ " 15.1 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 2016 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 2.7 | \n",
+ " 74.6 | \n",
+ " 22.7 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " 2016 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 5.7 | \n",
+ " 82.5 | \n",
+ " 11.8 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " 2017 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 4.1 | \n",
+ " 82.4 | \n",
+ " 13.5 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " 2017 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 2.4 | \n",
+ " 76.7 | \n",
+ " 20.9 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " 2017 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 6.0 | \n",
+ " 82.8 | \n",
+ " 11.2 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " 2018 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 3.8 | \n",
+ " 83.2 | \n",
+ " 13.0 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " 2018 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 2.2 | \n",
+ " 78.2 | \n",
+ " 19.6 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " 2018 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 6.1 | \n",
+ " 82.5 | \n",
+ " 11.4 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " 2019 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 2.9 | \n",
+ " 86.2 | \n",
+ " 10.9 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " 2019 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 1.7 | \n",
+ " 81.4 | \n",
+ " 16.9 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " 2019 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 4.5 | \n",
+ " 86.1 | \n",
+ " 9.4 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " 2020 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 1.6 | \n",
+ " 97.0 | \n",
+ " 1.4 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " 2020 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 1.2 | \n",
+ " 90.8 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " 2020 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 2.6 | \n",
+ " 95.2 | \n",
+ " 2.2 | \n",
+ "
\n",
+ " \n",
+ " 42 | \n",
+ " 2021 | \n",
+ " Brasil | \n",
+ " anosfinais | \n",
+ " 2.5 | \n",
+ " 94.1 | \n",
+ " 3.4 | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " 2021 | \n",
+ " Brasil | \n",
+ " anosiniciais | \n",
+ " 1.4 | \n",
+ " 87.9 | \n",
+ " 10.7 | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " 2021 | \n",
+ " Brasil | \n",
+ " ensinomedio | \n",
+ " 4.5 | \n",
+ " 91.7 | \n",
+ " 3.8 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "tipo_metrica ano nome_uf etapa_ensino taxaabandono taxaaprovacao \\\n",
+ "0 2007 Brasil anosfinais 5.1 78.7 \n",
+ "1 2007 Brasil anosiniciais 4.6 71.6 \n",
+ "2 2007 Brasil ensinomedio 7.4 79.6 \n",
+ "3 2008 Brasil anosfinais 5.0 78.8 \n",
+ "4 2008 Brasil anosiniciais 4.5 72.4 \n",
+ "5 2008 Brasil ensinomedio 7.8 79.3 \n",
+ "6 2009 Brasil anosfinais 4.3 81.5 \n",
+ "7 2009 Brasil anosiniciais 3.3 74.4 \n",
+ "8 2009 Brasil ensinomedio 7.0 80.0 \n",
+ "9 2010 Brasil anosfinais 4.6 81.1 \n",
+ "10 2010 Brasil anosiniciais 3.3 73.7 \n",
+ "11 2010 Brasil ensinomedio 7.4 79.4 \n",
+ "12 2011 Brasil anosfinais 4.6 80.4 \n",
+ "13 2011 Brasil anosiniciais 3.3 74.5 \n",
+ "14 2011 Brasil ensinomedio 7.1 79.4 \n",
+ "15 2012 Brasil anosfinais 4.7 80.2 \n",
+ "16 2012 Brasil anosiniciais 3.3 73.1 \n",
+ "17 2012 Brasil ensinomedio 7.1 80.1 \n",
+ "18 2013 Brasil anosfinais 4.4 81.7 \n",
+ "19 2013 Brasil anosiniciais 2.9 75.2 \n",
+ "20 2013 Brasil ensinomedio 6.8 81.7 \n",
+ "21 2014 Brasil anosfinais 4.4 80.9 \n",
+ "22 2014 Brasil anosiniciais 2.9 73.7 \n",
+ "23 2014 Brasil ensinomedio 6.1 82.0 \n",
+ "24 2015 Brasil anosfinais 4.3 81.0 \n",
+ "25 2015 Brasil anosiniciais 2.7 74.5 \n",
+ "26 2015 Brasil ensinomedio 6.2 82.3 \n",
+ "27 2016 Brasil anosfinais 4.4 80.5 \n",
+ "28 2016 Brasil anosiniciais 2.7 74.6 \n",
+ "29 2016 Brasil ensinomedio 5.7 82.5 \n",
+ "30 2017 Brasil anosfinais 4.1 82.4 \n",
+ "31 2017 Brasil anosiniciais 2.4 76.7 \n",
+ "32 2017 Brasil ensinomedio 6.0 82.8 \n",
+ "33 2018 Brasil anosfinais 3.8 83.2 \n",
+ "34 2018 Brasil anosiniciais 2.2 78.2 \n",
+ "35 2018 Brasil ensinomedio 6.1 82.5 \n",
+ "36 2019 Brasil anosfinais 2.9 86.2 \n",
+ "37 2019 Brasil anosiniciais 1.7 81.4 \n",
+ "38 2019 Brasil ensinomedio 4.5 86.1 \n",
+ "39 2020 Brasil anosfinais 1.6 97.0 \n",
+ "40 2020 Brasil anosiniciais 1.2 90.8 \n",
+ "41 2020 Brasil ensinomedio 2.6 95.2 \n",
+ "42 2021 Brasil anosfinais 2.5 94.1 \n",
+ "43 2021 Brasil anosiniciais 1.4 87.9 \n",
+ "44 2021 Brasil ensinomedio 4.5 91.7 \n",
+ "\n",
+ "tipo_metrica taxareprovacao \n",
+ "0 16.2 \n",
+ "1 23.8 \n",
+ "2 13.0 \n",
+ "3 16.2 \n",
+ "4 23.1 \n",
+ "5 12.9 \n",
+ "6 14.2 \n",
+ "7 22.3 \n",
+ "8 13.0 \n",
+ "9 14.3 \n",
+ "10 23.0 \n",
+ "11 13.2 \n",
+ "12 15.0 \n",
+ "13 22.2 \n",
+ "14 13.5 \n",
+ "15 15.1 \n",
+ "16 23.6 \n",
+ "17 12.8 \n",
+ "18 13.9 \n",
+ "19 21.9 \n",
+ "20 11.5 \n",
+ "21 14.7 \n",
+ "22 23.4 \n",
+ "23 11.9 \n",
+ "24 14.7 \n",
+ "25 22.8 \n",
+ "26 11.5 \n",
+ "27 15.1 \n",
+ "28 22.7 \n",
+ "29 11.8 \n",
+ "30 13.5 \n",
+ "31 20.9 \n",
+ "32 11.2 \n",
+ "33 13.0 \n",
+ "34 19.6 \n",
+ "35 11.4 \n",
+ "36 10.9 \n",
+ "37 16.9 \n",
+ "38 9.4 \n",
+ "39 1.4 \n",
+ "40 8.0 \n",
+ "41 2.2 \n",
+ "42 3.4 \n",
+ "43 10.7 \n",
+ "44 3.8 "
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_final"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_final = df_final.rename(columns=RENAME_COLUMNS_MELTED)\n",
+ "df_final['etapa_ensino'] = df_final['etapa_ensino'].replace(etapa_ensino)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "# Rename the 'sigla' column to 'sigla_uf' and drop the 'nome' column\n",
+ "df_final = df_final.drop(columns=['nome_uf'])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_final = df_final[['ano', 'etapa_ensino', 'taxa_aprovacao','taxa_reprovacao','taxa_abandono']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " tipo_metrica | \n",
+ " ano | \n",
+ " etapa_ensino | \n",
+ " taxa_aprovacao | \n",
+ " taxa_reprovacao | \n",
+ " taxa_abandono | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2007 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 78.7 | \n",
+ " 16.2 | \n",
+ " 5.1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2007 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 71.6 | \n",
+ " 23.8 | \n",
+ " 4.6 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2007 | \n",
+ " Ensino Médio Regular | \n",
+ " 79.6 | \n",
+ " 13.0 | \n",
+ " 7.4 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2008 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 78.8 | \n",
+ " 16.2 | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2008 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 72.4 | \n",
+ " 23.1 | \n",
+ " 4.5 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 2008 | \n",
+ " Ensino Médio Regular | \n",
+ " 79.3 | \n",
+ " 12.9 | \n",
+ " 7.8 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 2009 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 81.5 | \n",
+ " 14.2 | \n",
+ " 4.3 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 2009 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 74.4 | \n",
+ " 22.3 | \n",
+ " 3.3 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 2009 | \n",
+ " Ensino Médio Regular | \n",
+ " 80.0 | \n",
+ " 13.0 | \n",
+ " 7.0 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 2010 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 81.1 | \n",
+ " 14.3 | \n",
+ " 4.6 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 2010 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 73.7 | \n",
+ " 23.0 | \n",
+ " 3.3 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 2010 | \n",
+ " Ensino Médio Regular | \n",
+ " 79.4 | \n",
+ " 13.2 | \n",
+ " 7.4 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 2011 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 80.4 | \n",
+ " 15.0 | \n",
+ " 4.6 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 2011 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 74.5 | \n",
+ " 22.2 | \n",
+ " 3.3 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 2011 | \n",
+ " Ensino Médio Regular | \n",
+ " 79.4 | \n",
+ " 13.5 | \n",
+ " 7.1 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 2012 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 80.2 | \n",
+ " 15.1 | \n",
+ " 4.7 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 2012 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 73.1 | \n",
+ " 23.6 | \n",
+ " 3.3 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 2012 | \n",
+ " Ensino Médio Regular | \n",
+ " 80.1 | \n",
+ " 12.8 | \n",
+ " 7.1 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 2013 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 81.7 | \n",
+ " 13.9 | \n",
+ " 4.4 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 2013 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 75.2 | \n",
+ " 21.9 | \n",
+ " 2.9 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 2013 | \n",
+ " Ensino Médio Regular | \n",
+ " 81.7 | \n",
+ " 11.5 | \n",
+ " 6.8 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 2014 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 80.9 | \n",
+ " 14.7 | \n",
+ " 4.4 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 2014 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 73.7 | \n",
+ " 23.4 | \n",
+ " 2.9 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 2014 | \n",
+ " Ensino Médio Regular | \n",
+ " 82.0 | \n",
+ " 11.9 | \n",
+ " 6.1 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 2015 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 81.0 | \n",
+ " 14.7 | \n",
+ " 4.3 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 2015 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 74.5 | \n",
+ " 22.8 | \n",
+ " 2.7 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 2015 | \n",
+ " Ensino Médio Regular | \n",
+ " 82.3 | \n",
+ " 11.5 | \n",
+ " 6.2 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 2016 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 80.5 | \n",
+ " 15.1 | \n",
+ " 4.4 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 2016 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 74.6 | \n",
+ " 22.7 | \n",
+ " 2.7 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " 2016 | \n",
+ " Ensino Médio Regular | \n",
+ " 82.5 | \n",
+ " 11.8 | \n",
+ " 5.7 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " 2017 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 82.4 | \n",
+ " 13.5 | \n",
+ " 4.1 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " 2017 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 76.7 | \n",
+ " 20.9 | \n",
+ " 2.4 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " 2017 | \n",
+ " Ensino Médio Regular | \n",
+ " 82.8 | \n",
+ " 11.2 | \n",
+ " 6.0 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " 2018 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 83.2 | \n",
+ " 13.0 | \n",
+ " 3.8 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " 2018 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 78.2 | \n",
+ " 19.6 | \n",
+ " 2.2 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " 2018 | \n",
+ " Ensino Médio Regular | \n",
+ " 82.5 | \n",
+ " 11.4 | \n",
+ " 6.1 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " 2019 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 86.2 | \n",
+ " 10.9 | \n",
+ " 2.9 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " 2019 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 81.4 | \n",
+ " 16.9 | \n",
+ " 1.7 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " 2019 | \n",
+ " Ensino Médio Regular | \n",
+ " 86.1 | \n",
+ " 9.4 | \n",
+ " 4.5 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " 2020 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 97.0 | \n",
+ " 1.4 | \n",
+ " 1.6 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " 2020 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 90.8 | \n",
+ " 8.0 | \n",
+ " 1.2 | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " 2020 | \n",
+ " Ensino Médio Regular | \n",
+ " 95.2 | \n",
+ " 2.2 | \n",
+ " 2.6 | \n",
+ "
\n",
+ " \n",
+ " 42 | \n",
+ " 2021 | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 94.1 | \n",
+ " 3.4 | \n",
+ " 2.5 | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " 2021 | \n",
+ " Ensino Fundamental – Anos Iniciais | \n",
+ " 87.9 | \n",
+ " 10.7 | \n",
+ " 1.4 | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " 2021 | \n",
+ " Ensino Médio Regular | \n",
+ " 91.7 | \n",
+ " 3.8 | \n",
+ " 4.5 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "tipo_metrica ano etapa_ensino taxa_aprovacao \\\n",
+ "0 2007 Ensino Fundamental – Anos Finais 78.7 \n",
+ "1 2007 Ensino Fundamental – Anos Iniciais 71.6 \n",
+ "2 2007 Ensino Médio Regular 79.6 \n",
+ "3 2008 Ensino Fundamental – Anos Finais 78.8 \n",
+ "4 2008 Ensino Fundamental – Anos Iniciais 72.4 \n",
+ "5 2008 Ensino Médio Regular 79.3 \n",
+ "6 2009 Ensino Fundamental – Anos Finais 81.5 \n",
+ "7 2009 Ensino Fundamental – Anos Iniciais 74.4 \n",
+ "8 2009 Ensino Médio Regular 80.0 \n",
+ "9 2010 Ensino Fundamental – Anos Finais 81.1 \n",
+ "10 2010 Ensino Fundamental – Anos Iniciais 73.7 \n",
+ "11 2010 Ensino Médio Regular 79.4 \n",
+ "12 2011 Ensino Fundamental – Anos Finais 80.4 \n",
+ "13 2011 Ensino Fundamental – Anos Iniciais 74.5 \n",
+ "14 2011 Ensino Médio Regular 79.4 \n",
+ "15 2012 Ensino Fundamental – Anos Finais 80.2 \n",
+ "16 2012 Ensino Fundamental – Anos Iniciais 73.1 \n",
+ "17 2012 Ensino Médio Regular 80.1 \n",
+ "18 2013 Ensino Fundamental – Anos Finais 81.7 \n",
+ "19 2013 Ensino Fundamental – Anos Iniciais 75.2 \n",
+ "20 2013 Ensino Médio Regular 81.7 \n",
+ "21 2014 Ensino Fundamental – Anos Finais 80.9 \n",
+ "22 2014 Ensino Fundamental – Anos Iniciais 73.7 \n",
+ "23 2014 Ensino Médio Regular 82.0 \n",
+ "24 2015 Ensino Fundamental – Anos Finais 81.0 \n",
+ "25 2015 Ensino Fundamental – Anos Iniciais 74.5 \n",
+ "26 2015 Ensino Médio Regular 82.3 \n",
+ "27 2016 Ensino Fundamental – Anos Finais 80.5 \n",
+ "28 2016 Ensino Fundamental – Anos Iniciais 74.6 \n",
+ "29 2016 Ensino Médio Regular 82.5 \n",
+ "30 2017 Ensino Fundamental – Anos Finais 82.4 \n",
+ "31 2017 Ensino Fundamental – Anos Iniciais 76.7 \n",
+ "32 2017 Ensino Médio Regular 82.8 \n",
+ "33 2018 Ensino Fundamental – Anos Finais 83.2 \n",
+ "34 2018 Ensino Fundamental – Anos Iniciais 78.2 \n",
+ "35 2018 Ensino Médio Regular 82.5 \n",
+ "36 2019 Ensino Fundamental – Anos Finais 86.2 \n",
+ "37 2019 Ensino Fundamental – Anos Iniciais 81.4 \n",
+ "38 2019 Ensino Médio Regular 86.1 \n",
+ "39 2020 Ensino Fundamental – Anos Finais 97.0 \n",
+ "40 2020 Ensino Fundamental – Anos Iniciais 90.8 \n",
+ "41 2020 Ensino Médio Regular 95.2 \n",
+ "42 2021 Ensino Fundamental – Anos Finais 94.1 \n",
+ "43 2021 Ensino Fundamental – Anos Iniciais 87.9 \n",
+ "44 2021 Ensino Médio Regular 91.7 \n",
+ "\n",
+ "tipo_metrica taxa_reprovacao taxa_abandono \n",
+ "0 16.2 5.1 \n",
+ "1 23.8 4.6 \n",
+ "2 13.0 7.4 \n",
+ "3 16.2 5.0 \n",
+ "4 23.1 4.5 \n",
+ "5 12.9 7.8 \n",
+ "6 14.2 4.3 \n",
+ "7 22.3 3.3 \n",
+ "8 13.0 7.0 \n",
+ "9 14.3 4.6 \n",
+ "10 23.0 3.3 \n",
+ "11 13.2 7.4 \n",
+ "12 15.0 4.6 \n",
+ "13 22.2 3.3 \n",
+ "14 13.5 7.1 \n",
+ "15 15.1 4.7 \n",
+ "16 23.6 3.3 \n",
+ "17 12.8 7.1 \n",
+ "18 13.9 4.4 \n",
+ "19 21.9 2.9 \n",
+ "20 11.5 6.8 \n",
+ "21 14.7 4.4 \n",
+ "22 23.4 2.9 \n",
+ "23 11.9 6.1 \n",
+ "24 14.7 4.3 \n",
+ "25 22.8 2.7 \n",
+ "26 11.5 6.2 \n",
+ "27 15.1 4.4 \n",
+ "28 22.7 2.7 \n",
+ "29 11.8 5.7 \n",
+ "30 13.5 4.1 \n",
+ "31 20.9 2.4 \n",
+ "32 11.2 6.0 \n",
+ "33 13.0 3.8 \n",
+ "34 19.6 2.2 \n",
+ "35 11.4 6.1 \n",
+ "36 10.9 2.9 \n",
+ "37 16.9 1.7 \n",
+ "38 9.4 4.5 \n",
+ "39 1.4 1.6 \n",
+ "40 8.0 1.2 \n",
+ "41 2.2 2.6 \n",
+ "42 3.4 2.5 \n",
+ "43 10.7 1.4 \n",
+ "44 3.8 4.5 "
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_final"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path = os.path.join(\n",
+ " OUTPUT, \"educacao_especial_brasil_taxa_rendimento\"\n",
+ " )\n",
+ "\n",
+ "os.makedirs(path, exist_ok=True)\n",
+ "df_final.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.17"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/models/br_inep_educacao_especial/code/educacao_especial_docente_aee.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_docente_aee.ipynb
new file mode 100644
index 00000000..a25c0d30
--- /dev/null
+++ b/models/br_inep_educacao_especial/code/educacao_especial_docente_aee.ipynb
@@ -0,0 +1,630 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import zipfile\n",
+ "import pandas as pd\n",
+ "import basedosdados as bd\n",
+ "\n",
+ "INPUT = os.path.join(os.getcwd(), \"input\")\n",
+ "OUTPUT = os.path.join(os.getcwd(), \"output\")\n",
+ "\n",
+ "os.makedirs(INPUT, exist_ok=True)\n",
+ "os.makedirs(OUTPUT, exist_ok=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def read_sheet(sheet_name: str, skiprows: int = 2) -> pd.DataFrame:\n",
+ " return pd.read_excel(\n",
+ " os.path.join(\n",
+ " INPUT,\n",
+ " \"2020_2022_INDIC_ED_ESP.xlsx\"\n",
+ " ),\n",
+ " skiprows=skiprows,\n",
+ " sheet_name=sheet_name,\n",
+ " dtype=str\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sheets_etapa_ensino_serie = {\n",
+ " \"INDICADOR 2\":\"regente_aee\",\n",
+ " \"INDICADOR 3\":\"aee\",\n",
+ " \"INDICADOR 4\":\"regente_formacaocontinuada\",\n",
+ " \"INDICADOR 5\":\"aee_formacaocontinuada\", \n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfs = {\n",
+ " name: read_sheet(sheet_name)\n",
+ " for sheet_name, name in sheets_etapa_ensino_serie.items()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataframes = {}\n",
+ "\n",
+ "for table_name, columns in dfs.items():\n",
+ " df = pd.DataFrame(columns) # Create DataFrame for each table\n",
+ " dataframes[table_name] = df # Store the DataFrame in a dictionary"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['NU_ANO_CENSO', 'CO_REGIAO', 'NO_REGIAO', 'CO_UF', 'NO_UF', 'SIGLA',\n",
+ " 'NO_MUNICIPIO', 'CO_MUNICIPIO', 'DOCENTES'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataframes['aee'].columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "RENAMES_COLUMNS = {\n",
+ " \"regente_aee\": {\n",
+ " 'NU_ANO_CENSO':'ano', \n",
+ " #'CO_REGIAO',\n",
+ " #'NO_REGIAO', \n",
+ " #'CO_UF', \n",
+ " #'NO_UF', \n",
+ " 'SIGLA':\"sigla_uf\",\n",
+ " #'NO_MUNICIPIO', \n",
+ " 'CO_MUNICIPIO':\"id_municipio\", \n",
+ " 'DOCENTES':'quantidade_docente_regente_aee'\n",
+ " },\n",
+ " \"aee\": {\n",
+ " 'NU_ANO_CENSO':'ano', \n",
+ " #'CO_REGIAO',\n",
+ " #'NO_REGIAO', \n",
+ " #'CO_UF', \n",
+ " #'NO_UF', \n",
+ " 'SIGLA':\"sigla_uf\",\n",
+ " #'NO_MUNICIPIO', \n",
+ " 'CO_MUNICIPIO':\"id_municipio\", \n",
+ " 'DOCENTES':'quantidade_docente_aee'\n",
+ " },\n",
+ " \"regente_formacaocontinuada\": {\n",
+ " 'NU_ANO_CENSO':'ano', \n",
+ " #'CO_REGIAO',\n",
+ " #'NO_REGIAO', \n",
+ " #'CO_UF', \n",
+ " #'NO_UF', \n",
+ " 'SIGLA':\"sigla_uf\",\n",
+ " #'NO_MUNICIPIO', \n",
+ " 'CO_MUNICIPIO':\"id_municipio\", \n",
+ " 'DOCENTES':'quantidade_docente_regente_formacao_continuada'\n",
+ " },\n",
+ " \"aee_formacaocontinuada\": {\n",
+ " 'NU_ANO_CENSO':'ano', \n",
+ " #'CO_REGIAO',\n",
+ " #'NO_REGIAO', \n",
+ " #'CO_UF', \n",
+ " #'NO_UF', \n",
+ " 'SIGLA':\"sigla_uf\",\n",
+ " #'NO_MUNICIPIO', \n",
+ " 'CO_MUNICIPIO':\"id_municipio\", \n",
+ " 'DOCENTES':'quantidade_docente_aee_formacao_continuada'\n",
+ " }\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n",
+ " cols_drop = [\n",
+ " col\n",
+ " for col in df.columns\n",
+ " if col.startswith(\"CO_\")\n",
+ " or col.startswith(\"NO_\")\n",
+ " ]\n",
+ "\n",
+ " return df.drop(columns=cols_drop)\n",
+ "\n",
+ "dfs = {\n",
+ " name: drop_unused_columns(\n",
+ " df.rename(columns=RENAMES_COLUMNS[name], errors=\"raise\")\n",
+ " )\n",
+ " for name, df in dfs.items()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sheet: regente_aee\n",
+ "Index(['ano', 'sigla_uf', 'id_municipio', 'quantidade_docente_regente_aee'], dtype='object')\n",
+ "\n",
+ "Sheet: aee\n",
+ "Index(['ano', 'sigla_uf', 'id_municipio', 'quantidade_docente_aee'], dtype='object')\n",
+ "\n",
+ "Sheet: regente_formacaocontinuada\n",
+ "Index(['ano', 'sigla_uf', 'id_municipio',\n",
+ " 'quantidade_docente_regente_formacao_continuada'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: aee_formacaocontinuada\n",
+ "Index(['ano', 'sigla_uf', 'id_municipio',\n",
+ " 'quantidade_docente_aee_formacao_continuada'],\n",
+ " dtype='object')\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sheet_name, df in dfs.items():\n",
+ " print(f\"Sheet: {sheet_name}\")\n",
+ " print(df.columns) # This will print the column names of each DataFrame\n",
+ " print()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def remove_nulls(df: pd.DataFrame, name: str) -> pd.DataFrame:\n",
+ " # Ensure you're working with the whole DataFrame\n",
+ " df = df.dropna(subset=['id_municipio']) # Remove rows where 'id_municipio' is NaN\n",
+ " df = df.loc[df['id_municipio'].astype(str) != \" \"] # Filter out rows where 'id_municipio' is empty space\n",
+ " return df\n",
+ "\n",
+ "# Apply remove_nulls to each DataFrame in the dictionary\n",
+ "dfs = {\n",
+ " name: remove_nulls(df, name) # This keeps the original key (sheet name) with the cleaned DataFrame\n",
+ " for name, df in dfs.items()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'regente_aee': ano sigla_uf id_municipio quantidade_docente_regente_aee\n",
+ " 3 2020 RO 1100015 291\n",
+ " 4 2020 RO 1100379 128\n",
+ " 5 2020 RO 1100403 135\n",
+ " 6 2020 RO 1100346 189\n",
+ " 7 2020 RO 1100023 1018\n",
+ " ... ... ... ... ...\n",
+ " 16803 2022 GO 5222302 78\n",
+ " 16804 2022 GO 5200175 58\n",
+ " 16805 2022 GO 5200209 31\n",
+ " 16806 2022 GO 5200258 1818\n",
+ " 16808 2022 DF 5300108 30821\n",
+ " \n",
+ " [16710 rows x 4 columns],\n",
+ " 'aee': ano sigla_uf id_municipio quantidade_docente_aee\n",
+ " 3 2020 RO 1100015 10\n",
+ " 4 2020 RO 1100379 3\n",
+ " 5 2020 RO 1100403 4\n",
+ " 6 2020 RO 1100346 11\n",
+ " 7 2020 RO 1100023 55\n",
+ " ... ... ... ... ...\n",
+ " 13851 2022 GO 5222203 3\n",
+ " 13852 2022 GO 5222302 3\n",
+ " 13853 2022 GO 5200209 2\n",
+ " 13854 2022 GO 5200258 58\n",
+ " 13856 2022 DF 5300108 673\n",
+ " \n",
+ " [13758 rows x 4 columns],\n",
+ " 'regente_formacaocontinuada': ano sigla_uf id_municipio \\\n",
+ " 3 2020 RO 1100015 \n",
+ " 4 2020 RO 1100379 \n",
+ " 5 2020 RO 1100403 \n",
+ " 6 2020 RO 1100346 \n",
+ " 7 2020 RO 1100023 \n",
+ " ... ... ... ... \n",
+ " 15133 2022 GO 5222302 \n",
+ " 15134 2022 GO 5200175 \n",
+ " 15135 2022 GO 5200209 \n",
+ " 15136 2022 GO 5200258 \n",
+ " 15138 2022 DF 5300108 \n",
+ " \n",
+ " quantidade_docente_regente_formacao_continuada \n",
+ " 3 14 \n",
+ " 4 1 \n",
+ " 5 2 \n",
+ " 6 9 \n",
+ " 7 34 \n",
+ " ... ... \n",
+ " 15133 2 \n",
+ " 15134 3 \n",
+ " 15135 6 \n",
+ " 15136 310 \n",
+ " 15138 4051 \n",
+ " \n",
+ " [15040 rows x 4 columns],\n",
+ " 'aee_formacaocontinuada': ano sigla_uf id_municipio quantidade_docente_aee_formacao_continuada\n",
+ " 3 2020 RO 1100015 5\n",
+ " 4 2020 RO 1100346 1\n",
+ " 5 2020 RO 1100023 15\n",
+ " 6 2020 RO 1100452 7\n",
+ " 7 2020 RO 1100031 3\n",
+ " ... ... ... ... ...\n",
+ " 9899 2022 GO 5221858 19\n",
+ " 9900 2022 GO 5222203 1\n",
+ " 9901 2022 GO 5200209 1\n",
+ " 9902 2022 GO 5200258 39\n",
+ " 9904 2022 DF 5300108 471\n",
+ " \n",
+ " [9806 rows x 4 columns]}"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dfs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "# Assuming 'dfs' is your dictionary with four tables\n",
+ "df_regente_aee = pd.DataFrame(dfs['regente_aee'])\n",
+ "df_aee = pd.DataFrame(dfs['aee'])\n",
+ "df_regente_formacaocontinuada = pd.DataFrame(dfs['regente_formacaocontinuada']) # Example third table\n",
+ "df_aee_formacaocontinuada = pd.DataFrame(dfs['aee_formacaocontinuada']) # Example fourth table\n",
+ "\n",
+ "# Merge all four DataFrames with outer join on 'ano', 'sigla_uf', 'id_municipio'\n",
+ "merged_df = df_regente_aee.merge(df_aee, on=['ano', 'sigla_uf', 'id_municipio'], how='outer')\n",
+ "merged_df = merged_df.merge(df_regente_formacaocontinuada, on=['ano', 'sigla_uf', 'id_municipio'], how='outer')\n",
+ "merged_df = merged_df.merge(df_aee_formacaocontinuada, on=['ano', 'sigla_uf', 'id_municipio'], how='outer')\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "merged_df['quantidade_docente_regente_aee'] = pd.to_numeric(merged_df['quantidade_docente_regente_aee'], errors='coerce')\n",
+ "merged_df['quantidade_docente_aee'] = pd.to_numeric(merged_df['quantidade_docente_aee'], errors='coerce')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "merged_df['quantidade_docente_regente'] = merged_df['quantidade_docente_regente_aee'] - merged_df['quantidade_docente_aee']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "merged_df = merged_df.drop(columns='quantidade_docente_regente_aee', axis='')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "merged_df = merged_df[[\n",
+ " 'ano', \n",
+ " 'sigla_uf', \n",
+ " 'id_municipio',\n",
+ " 'quantidade_docente_regente',\n",
+ " 'quantidade_docente_aee',\n",
+ " 'quantidade_docente_regente_formacao_continuada',\n",
+ " 'quantidade_docente_aee_formacao_continuada']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ano | \n",
+ " sigla_uf | \n",
+ " id_municipio | \n",
+ " quantidade_docente_regente | \n",
+ " quantidade_docente_aee | \n",
+ " quantidade_docente_regente_formacao_continuada | \n",
+ " quantidade_docente_aee_formacao_continuada | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2020 | \n",
+ " RO | \n",
+ " 1100015 | \n",
+ " 281.0 | \n",
+ " 10.0 | \n",
+ " 14 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2020 | \n",
+ " RO | \n",
+ " 1100379 | \n",
+ " 125.0 | \n",
+ " 3.0 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2020 | \n",
+ " RO | \n",
+ " 1100403 | \n",
+ " 131.0 | \n",
+ " 4.0 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2020 | \n",
+ " RO | \n",
+ " 1100346 | \n",
+ " 178.0 | \n",
+ " 11.0 | \n",
+ " 9 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2020 | \n",
+ " RO | \n",
+ " 1100023 | \n",
+ " 963.0 | \n",
+ " 55.0 | \n",
+ " 34 | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 16705 | \n",
+ " 2022 | \n",
+ " GO | \n",
+ " 5222302 | \n",
+ " 75.0 | \n",
+ " 3.0 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 16706 | \n",
+ " 2022 | \n",
+ " GO | \n",
+ " 5200175 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 3 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 16707 | \n",
+ " 2022 | \n",
+ " GO | \n",
+ " 5200209 | \n",
+ " 29.0 | \n",
+ " 2.0 | \n",
+ " 6 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 16708 | \n",
+ " 2022 | \n",
+ " GO | \n",
+ " 5200258 | \n",
+ " 1760.0 | \n",
+ " 58.0 | \n",
+ " 310 | \n",
+ " 39 | \n",
+ "
\n",
+ " \n",
+ " 16709 | \n",
+ " 2022 | \n",
+ " DF | \n",
+ " 5300108 | \n",
+ " 30148.0 | \n",
+ " 673.0 | \n",
+ " 4051 | \n",
+ " 471 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
16710 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ano sigla_uf id_municipio quantidade_docente_regente \\\n",
+ "0 2020 RO 1100015 281.0 \n",
+ "1 2020 RO 1100379 125.0 \n",
+ "2 2020 RO 1100403 131.0 \n",
+ "3 2020 RO 1100346 178.0 \n",
+ "4 2020 RO 1100023 963.0 \n",
+ "... ... ... ... ... \n",
+ "16705 2022 GO 5222302 75.0 \n",
+ "16706 2022 GO 5200175 NaN \n",
+ "16707 2022 GO 5200209 29.0 \n",
+ "16708 2022 GO 5200258 1760.0 \n",
+ "16709 2022 DF 5300108 30148.0 \n",
+ "\n",
+ " quantidade_docente_aee quantidade_docente_regente_formacao_continuada \\\n",
+ "0 10.0 14 \n",
+ "1 3.0 1 \n",
+ "2 4.0 2 \n",
+ "3 11.0 9 \n",
+ "4 55.0 34 \n",
+ "... ... ... \n",
+ "16705 3.0 2 \n",
+ "16706 NaN 3 \n",
+ "16707 2.0 6 \n",
+ "16708 58.0 310 \n",
+ "16709 673.0 4051 \n",
+ "\n",
+ " quantidade_docente_aee_formacao_continuada \n",
+ "0 5 \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 1 \n",
+ "4 15 \n",
+ "... ... \n",
+ "16705 NaN \n",
+ "16706 NaN \n",
+ "16707 1 \n",
+ "16708 39 \n",
+ "16709 471 \n",
+ "\n",
+ "[16710 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "merged_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path = os.path.join(\n",
+ " OUTPUT, \"educacao_especial_aee_docente\"\n",
+ " )\n",
+ "\n",
+ "os.makedirs(path, exist_ok=True)\n",
+ "merged_df.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.17"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/models/br_inep_educacao_especial/code/educacao_especial_docente_formacao.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_docente_formacao.ipynb
new file mode 100644
index 00000000..6a47fc0b
--- /dev/null
+++ b/models/br_inep_educacao_especial/code/educacao_especial_docente_formacao.ipynb
@@ -0,0 +1,460 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import zipfile\n",
+ "import pandas as pd\n",
+ "import basedosdados as bd\n",
+ "\n",
+ "INPUT = os.path.join(os.getcwd(), \"input\")\n",
+ "OUTPUT = os.path.join(os.getcwd(), \"output\")\n",
+ "\n",
+ "os.makedirs(INPUT, exist_ok=True)\n",
+ "os.makedirs(OUTPUT, exist_ok=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def read_sheet(sheet_name: str, skiprows: int = 9) -> pd.DataFrame:\n",
+ " return pd.read_excel(\n",
+ " os.path.join(\n",
+ " INPUT,\n",
+ " \"Demanda_23546-049990_2024_06_DOC_EDU_ESPECIAL_BAS__2012_A_2023.xlsx\"\n",
+ " ),\n",
+ " skiprows=skiprows,\n",
+ " sheet_name=sheet_name,\n",
+ " dtype=str\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "excel_data = pd.ExcelFile(os.path.join(\n",
+ " INPUT,\n",
+ " \"Demanda_23546-049990_2024_06_DOC_EDU_ESPECIAL_BAS__2012_A_2023.xlsx\"\n",
+ " ))\n",
+ "\n",
+ "# Get the sheet names\n",
+ "sheet_names = excel_data.sheet_names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfs = {\n",
+ " sheet_name: read_sheet(sheet_name)\n",
+ " for sheet_name in sheet_names\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'Planilha1': NU_ANO_CENSO CO_REGIAO NO_REGIAO \\\n",
+ " 0 2012 NaN Brasil \n",
+ " 1 2012 1 Norte \n",
+ " 2 2012 1 Norte \n",
+ " 3 2012 1 Norte \n",
+ " 4 2012 1 Norte \n",
+ " ... ... ... ... \n",
+ " 57988 NaN NaN NaN \n",
+ " 57989 Fonte: INEP – Censo Escolar da Educação Básica NaN NaN \n",
+ " 57990 Notas: 1 - Os docentes referem-se aos indivíd... NaN NaN \n",
+ " 57991 2 - Os docentes são contados u... NaN NaN \n",
+ " 57992 3 - Não inclui auxiliares da E... NaN NaN \n",
+ " \n",
+ " CO_UF SG_UF NO_UF CO_MUNICIPIO NO_MUNICIPIO DOCEE DOCFED \\\n",
+ " 0 NaN NaN NaN NaN NaN 88244 255 \n",
+ " 1 NaN NaN NaN NaN NaN 5954 41 \n",
+ " 2 11 RO Rondônia NaN NaN 593 8 \n",
+ " 3 11 RO Rondônia 1100015 Alta Floresta D'Oeste 12 0 \n",
+ " 4 11 RO Rondônia 1100403 Alto Paraíso 2 0 \n",
+ " ... ... ... ... ... ... ... ... \n",
+ " 57988 NaN NaN NaN NaN NaN NaN NaN \n",
+ " 57989 NaN NaN NaN NaN NaN NaN NaN \n",
+ " 57990 NaN NaN NaN NaN NaN NaN NaN \n",
+ " 57991 NaN NaN NaN NaN NaN NaN NaN \n",
+ " 57992 NaN NaN NaN NaN NaN NaN NaN \n",
+ " \n",
+ " DOCEST DOCMUNI DOCPRIV \n",
+ " 0 23762 52541 21635 \n",
+ " 1 2328 3490 739 \n",
+ " 2 244 243 161 \n",
+ " 3 2 9 1 \n",
+ " 4 2 0 0 \n",
+ " ... ... ... ... \n",
+ " 57988 NaN NaN NaN \n",
+ " 57989 NaN NaN NaN \n",
+ " 57990 NaN NaN NaN \n",
+ " 57991 NaN NaN NaN \n",
+ " 57992 NaN NaN NaN \n",
+ " \n",
+ " [57993 rows x 13 columns]}"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dfs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sheet: Planilha1\n",
+ "Index(['NU_ANO_CENSO', 'CO_REGIAO', 'NO_REGIAO', 'CO_UF', 'SG_UF', 'NO_UF',\n",
+ " 'CO_MUNICIPIO', 'NO_MUNICIPIO', 'DOCEE', 'DOCFED', 'DOCEST', 'DOCMUNI',\n",
+ " 'DOCPRIV'],\n",
+ " dtype='object')\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sheet_name, df in dfs.items():\n",
+ " print(f\"Sheet: {sheet_name}\")\n",
+ " print(df.columns) # This will print the column names of each DataFrame\n",
+ " print() # Adds a blank line for readability"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "RENAME_COLUMNS = {\n",
+ " 'NU_ANO_CENSO':'ano', \n",
+ " 'SG_UF':'sigla_uf', \n",
+ " 'CO_MUNICIPIO':'id_municipio', \n",
+ " 'DOCFED':'Federal', \n",
+ " 'DOCEST':'Estadual', \n",
+ " 'DOCMUNI':'Municipal',\n",
+ " 'DOCPRIV':'Privada' \n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n",
+ " cols_drop = [\n",
+ " col\n",
+ " for col in df.columns\n",
+ " if col.startswith(\"NO_\") \n",
+ " or col.startswith(\"CO_\")\n",
+ " or col.startswith('DOCEE') \n",
+ " ]\n",
+ "\n",
+ " return df.drop(columns=cols_drop)\n",
+ "\n",
+ "dfs = {\n",
+ " name: drop_unused_columns(\n",
+ " df.rename(columns=RENAME_COLUMNS, errors=\"raise\")\n",
+ " )\n",
+ " for name, df in dfs.items()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sheet: Planilha1\n",
+ "Index(['ano', 'sigla_uf', 'id_municipio', 'Federal', 'Estadual', 'Municipal',\n",
+ " 'Privada'],\n",
+ " dtype='object')\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sheet_name, df in dfs.items():\n",
+ " print(f\"Sheet: {sheet_name}\")\n",
+ " print(df.columns) # This will print the column names of each DataFrame\n",
+ " print() # Adds a blank line for readability"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "melted_dataframe = pd.concat(\n",
+ " [\n",
+ " df.pipe(\n",
+ " lambda d: d.loc[(d[\"id_municipio\"].notna()) & (d[\"id_municipio\"] != \" \")]\n",
+ " )\n",
+ " .pipe(\n",
+ " lambda d: pd.melt(\n",
+ " d,\n",
+ " id_vars=[\"ano\", \"sigla_uf\", 'id_municipio'],\n",
+ " value_vars=d.columns.difference([\"id_uf\", \"nome\"]).tolist(), # Convert to list\n",
+ " var_name=\"rede\",\n",
+ " value_name=\"quantidade_docente_formacao_continuada\",\n",
+ " )\n",
+ " )\n",
+ " ]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "melted_dataframe = melted_dataframe.sort_values(\n",
+ " by=['ano', 'sigla_uf','id_municipio', 'rede'], \n",
+ " ascending=[True, True, True, True])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ano | \n",
+ " sigla_uf | \n",
+ " id_municipio | \n",
+ " rede | \n",
+ " quantidade_docente_formacao_continuada | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 45 | \n",
+ " 2012 | \n",
+ " AC | \n",
+ " 1200013 | \n",
+ " Estadual | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 57638 | \n",
+ " 2012 | \n",
+ " AC | \n",
+ " 1200013 | \n",
+ " Federal | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 115231 | \n",
+ " 2012 | \n",
+ " AC | \n",
+ " 1200013 | \n",
+ " Municipal | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " 172824 | \n",
+ " 2012 | \n",
+ " AC | \n",
+ " 1200013 | \n",
+ " Privada | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 46 | \n",
+ " 2012 | \n",
+ " AC | \n",
+ " 1200104 | \n",
+ " Estadual | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 225613 | \n",
+ " 2023 | \n",
+ " TO | \n",
+ " 1721307 | \n",
+ " Privada | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 52835 | \n",
+ " 2023 | \n",
+ " TO | \n",
+ " 1722107 | \n",
+ " Estadual | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 110428 | \n",
+ " 2023 | \n",
+ " TO | \n",
+ " 1722107 | \n",
+ " Federal | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 168021 | \n",
+ " 2023 | \n",
+ " TO | \n",
+ " 1722107 | \n",
+ " Municipal | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 225614 | \n",
+ " 2023 | \n",
+ " TO | \n",
+ " 1722107 | \n",
+ " Privada | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
230372 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ano sigla_uf id_municipio rede \\\n",
+ "45 2012 AC 1200013 Estadual \n",
+ "57638 2012 AC 1200013 Federal \n",
+ "115231 2012 AC 1200013 Municipal \n",
+ "172824 2012 AC 1200013 Privada \n",
+ "46 2012 AC 1200104 Estadual \n",
+ "... ... ... ... ... \n",
+ "225613 2023 TO 1721307 Privada \n",
+ "52835 2023 TO 1722107 Estadual \n",
+ "110428 2023 TO 1722107 Federal \n",
+ "168021 2023 TO 1722107 Municipal \n",
+ "225614 2023 TO 1722107 Privada \n",
+ "\n",
+ " quantidade_docente_formacao_continuada \n",
+ "45 2 \n",
+ "57638 0 \n",
+ "115231 12 \n",
+ "172824 0 \n",
+ "46 3 \n",
+ "... ... \n",
+ "225613 0 \n",
+ "52835 0 \n",
+ "110428 0 \n",
+ "168021 1 \n",
+ "225614 0 \n",
+ "\n",
+ "[230372 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "melted_dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path = os.path.join(\n",
+ " OUTPUT, \"educacao_especial_formacao_docente\"\n",
+ " )\n",
+ "\n",
+ "os.makedirs(path, exist_ok=True)\n",
+ "melted_dataframe.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.17"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/models/br_inep_educacao_especial/code/educacao_especial_matricula_aee.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_matricula_aee.ipynb
new file mode 100644
index 00000000..34abfecc
--- /dev/null
+++ b/models/br_inep_educacao_especial/code/educacao_especial_matricula_aee.ipynb
@@ -0,0 +1,458 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import zipfile\n",
+ "import pandas as pd\n",
+ "import basedosdados as bd\n",
+ "\n",
+ "INPUT = os.path.join(os.getcwd(), \"input\")\n",
+ "OUTPUT = os.path.join(os.getcwd(), \"output\")\n",
+ "\n",
+ "os.makedirs(INPUT, exist_ok=True)\n",
+ "os.makedirs(OUTPUT, exist_ok=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def read_sheet(sheet_name: str, skiprows: int = 7) -> pd.DataFrame:\n",
+ " return pd.read_excel(\n",
+ " os.path.join(\n",
+ " INPUT,\n",
+ " \"mat_ed_especial_aee_uf_rede_2019_2022.xlsx\"\n",
+ " ),\n",
+ " skiprows=skiprows,\n",
+ " sheet_name=sheet_name\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "excel_data = pd.ExcelFile(os.path.join(\n",
+ " INPUT,\n",
+ " \"mat_ed_especial_aee_uf_rede_2019_2022.xlsx\"\n",
+ " ))\n",
+ "\n",
+ "# Get the sheet names\n",
+ "sheet_names = excel_data.sheet_names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfs = {\n",
+ " sheet_name: read_sheet(sheet_name)\n",
+ " for sheet_name in sheet_names\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'tabmat': NU_ANO_CENSO NO_REGIAO CO_REGIAO \\\n",
+ " 0 2022 Brasil NaN \n",
+ " 1 2022 Brasil NaN \n",
+ " 2 2022 Brasil NaN \n",
+ " 3 2022 Norte 1.0 \n",
+ " 4 2022 Norte 1.0 \n",
+ " .. ... ... ... \n",
+ " 333 2019 Centro-Oeste 5.0 \n",
+ " 334 2019 Centro-Oeste 5.0 \n",
+ " 335 2019 Centro-Oeste 5.0 \n",
+ " 336 NaN NaN NaN \n",
+ " 337 Fonte: Censo Escolar da Educação Básica/Inep. NaN NaN \n",
+ " \n",
+ " NO_UF SG_UF CO_UF rede MatEsp MatEspAee \n",
+ " 0 NaN NaN NaN NaN 1527794.0 568200.0 \n",
+ " 1 NaN NaN NaN Pública 1301961.0 525868.0 \n",
+ " 2 NaN NaN NaN Privada 225833.0 42332.0 \n",
+ " 3 Rondônia RO 11.0 NaN 14341.0 8328.0 \n",
+ " 4 Rondônia RO 11.0 Pública 12803.0 6965.0 \n",
+ " .. ... ... ... ... ... ... \n",
+ " 333 Distrito Federal DF 53.0 NaN 16580.0 8967.0 \n",
+ " 334 Distrito Federal DF 53.0 Pública 14362.0 8702.0 \n",
+ " 335 Distrito Federal DF 53.0 Privada 2218.0 265.0 \n",
+ " 336 NaN NaN NaN NaN NaN NaN \n",
+ " 337 NaN NaN NaN NaN NaN NaN \n",
+ " \n",
+ " [338 rows x 9 columns]}"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dfs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sheet: tabmat\n",
+ "Index(['NU_ANO_CENSO', 'NO_REGIAO', 'CO_REGIAO', 'NO_UF', 'SG_UF', 'CO_UF',\n",
+ " 'rede', 'MatEsp', 'MatEspAee'],\n",
+ " dtype='object')\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sheet_name, df in dfs.items():\n",
+ " print(f\"Sheet: {sheet_name}\")\n",
+ " print(df.columns) # This will print the column names of each DataFrame\n",
+ " print() # Adds a blank line for readability"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "RENAME_COLUMNS = {\n",
+ " 'NU_ANO_CENSO':'ano',\n",
+ " 'SG_UF':'sigla_uf', \n",
+ " 'MatEsp':'quantidade_matricula', \n",
+ " 'MatEspAee':'quantidade_matricula_aee'}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n",
+ " cols_drop = [\n",
+ " col\n",
+ " for col in df.columns\n",
+ " if col.startswith(\"NO_\") \n",
+ " or col.startswith(\"CO\") \n",
+ " ]\n",
+ "\n",
+ " return df.drop(columns=cols_drop)\n",
+ "\n",
+ "dfs = {\n",
+ " name: drop_unused_columns(\n",
+ " df.rename(columns=RENAME_COLUMNS, errors=\"raise\")\n",
+ " )\n",
+ " for name, df in dfs.items()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'tabmat': ano sigla_uf rede \\\n",
+ " 0 2022 NaN NaN \n",
+ " 1 2022 NaN Pública \n",
+ " 2 2022 NaN Privada \n",
+ " 3 2022 RO NaN \n",
+ " 4 2022 RO Pública \n",
+ " .. ... ... ... \n",
+ " 333 2019 DF NaN \n",
+ " 334 2019 DF Pública \n",
+ " 335 2019 DF Privada \n",
+ " 336 NaN NaN NaN \n",
+ " 337 Fonte: Censo Escolar da Educação Básica/Inep. NaN NaN \n",
+ " \n",
+ " quantidade_matricula quantidade_matricula_aee \n",
+ " 0 1527794.0 568200.0 \n",
+ " 1 1301961.0 525868.0 \n",
+ " 2 225833.0 42332.0 \n",
+ " 3 14341.0 8328.0 \n",
+ " 4 12803.0 6965.0 \n",
+ " .. ... ... \n",
+ " 333 16580.0 8967.0 \n",
+ " 334 14362.0 8702.0 \n",
+ " 335 2218.0 265.0 \n",
+ " 336 NaN NaN \n",
+ " 337 NaN NaN \n",
+ " \n",
+ " [338 rows x 5 columns]}"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dfs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sheet: tabmat\n",
+ "Index(['ano', 'sigla_uf', 'rede', 'quantidade_matricula',\n",
+ " 'quantidade_matricula_aee'],\n",
+ " dtype='object')\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sheet_name, df in dfs.items():\n",
+ " print(f\"Sheet: {sheet_name}\")\n",
+ " print(df.columns) # This will print the column names of each DataFrame\n",
+ " print() "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataframe = pd.DataFrame(dfs['tabmat']).dropna(subset=['sigla_uf', 'rede']).loc[lambda df: (df['sigla_uf'] != \" \") & (df['rede'] != \" \")]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataframe[['quantidade_matricula','quantidade_matricula_aee']] = dataframe[['quantidade_matricula','quantidade_matricula_aee']].astype(int)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataframe = dataframe.sort_values(by=['ano', 'sigla_uf'], ascending=[True, True])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ano | \n",
+ " sigla_uf | \n",
+ " rede | \n",
+ " quantidade_matricula | \n",
+ " quantidade_matricula_aee | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 259 | \n",
+ " 2019 | \n",
+ " AC | \n",
+ " Pública | \n",
+ " 10782 | \n",
+ " 6395 | \n",
+ "
\n",
+ " \n",
+ " 260 | \n",
+ " 2019 | \n",
+ " AC | \n",
+ " Privada | \n",
+ " 325 | \n",
+ " 261 | \n",
+ "
\n",
+ " \n",
+ " 295 | \n",
+ " 2019 | \n",
+ " AL | \n",
+ " Pública | \n",
+ " 21838 | \n",
+ " 10145 | \n",
+ "
\n",
+ " \n",
+ " 296 | \n",
+ " 2019 | \n",
+ " AL | \n",
+ " Privada | \n",
+ " 1314 | \n",
+ " 1832 | \n",
+ "
\n",
+ " \n",
+ " 262 | \n",
+ " 2019 | \n",
+ " AM | \n",
+ " Pública | \n",
+ " 17216 | \n",
+ " 5074 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 47 | \n",
+ " 2022 | \n",
+ " SE | \n",
+ " Privada | \n",
+ " 1604 | \n",
+ " 227 | \n",
+ "
\n",
+ " \n",
+ " 61 | \n",
+ " 2022 | \n",
+ " SP | \n",
+ " Pública | \n",
+ " 191708 | \n",
+ " 68563 | \n",
+ "
\n",
+ " \n",
+ " 62 | \n",
+ " 2022 | \n",
+ " SP | \n",
+ " Privada | \n",
+ " 52146 | \n",
+ " 942 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 2022 | \n",
+ " TO | \n",
+ " Pública | \n",
+ " 15977 | \n",
+ " 7222 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 2022 | \n",
+ " TO | \n",
+ " Privada | \n",
+ " 567 | \n",
+ " 28 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
216 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ano sigla_uf rede quantidade_matricula quantidade_matricula_aee\n",
+ "259 2019 AC Pública 10782 6395\n",
+ "260 2019 AC Privada 325 261\n",
+ "295 2019 AL Pública 21838 10145\n",
+ "296 2019 AL Privada 1314 1832\n",
+ "262 2019 AM Pública 17216 5074\n",
+ ".. ... ... ... ... ...\n",
+ "47 2022 SE Privada 1604 227\n",
+ "61 2022 SP Pública 191708 68563\n",
+ "62 2022 SP Privada 52146 942\n",
+ "22 2022 TO Pública 15977 7222\n",
+ "23 2022 TO Privada 567 28\n",
+ "\n",
+ "[216 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path = os.path.join(\n",
+ " OUTPUT, \"educacao_especial_aee_matricula\"\n",
+ " )\n",
+ "\n",
+ "os.makedirs(path, exist_ok=True)\n",
+ "dataframe.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.17"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/models/br_inep_educacao_especial/code/educacao_especial_uf_distorcao_idade_serie.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_uf_distorcao_idade_serie.ipynb
new file mode 100644
index 00000000..2a9c4ba5
--- /dev/null
+++ b/models/br_inep_educacao_especial/code/educacao_especial_uf_distorcao_idade_serie.ipynb
@@ -0,0 +1,4066 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import zipfile\n",
+ "import pandas as pd\n",
+ "import basedosdados as bd\n",
+ "\n",
+ "INPUT = os.path.join(os.getcwd(), \"input\")\n",
+ "OUTPUT = os.path.join(os.getcwd(), \"output\")\n",
+ "\n",
+ "os.makedirs(INPUT, exist_ok=True)\n",
+ "os.makedirs(OUTPUT, exist_ok=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def read_sheet(sheet_name: str, skiprows: int = 3) -> pd.DataFrame:\n",
+ " return pd.read_excel(\n",
+ " os.path.join(\n",
+ " INPUT,\n",
+ " \"TDI_-_Alunos_com_defici_ncia.xlsx\"\n",
+ " ),\n",
+ " skiprows=skiprows,\n",
+ " sheet_name=sheet_name\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "excel_data = pd.ExcelFile(os.path.join(\n",
+ " INPUT,\n",
+ " \"TDI_-_Alunos_com_defici_ncia.xlsx\"\n",
+ " ))\n",
+ "\n",
+ "# Get the sheet names\n",
+ "sheet_names = excel_data.sheet_names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfs = {\n",
+ " sheet_name: read_sheet(sheet_name)\n",
+ " for sheet_name in sheet_names\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'2007': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 72.2 60.6 \n",
+ " 1 Norte 76.1 69.7 \n",
+ " 2 Nordeste 74.4 64.7 \n",
+ " 3 Sudeste 69.3 56.5 \n",
+ " 4 Sul 73.3 59.6 \n",
+ " 5 Centro-Oeste 78.7 73.6 \n",
+ " 6 Rondônia 72.8 65.8 \n",
+ " 7 Acre 67.2 60.0 \n",
+ " 8 Amazonas 76.3 75.7 \n",
+ " 9 Roraima 71.0 69.5 \n",
+ " 10 Pará 78.2 68.4 \n",
+ " 11 Amapá 76.4 81.0 \n",
+ " 12 Tocantins 75.4 70.7 \n",
+ " 13 Maranhão 74.2 61.2 \n",
+ " 14 Piauí 83.0 69.3 \n",
+ " 15 Ceará 67.8 56.9 \n",
+ " 16 Rio Grande do Norte 66.4 74.0 \n",
+ " 17 Paraíba 73.3 71.6 \n",
+ " 18 Pernambuco 78.6 64.5 \n",
+ " 19 Alagoas 72.0 79.2 \n",
+ " 20 Sergipe 85.0 85.4 \n",
+ " 21 Bahia 76.7 66.5 \n",
+ " 22 Minas Gerais 79.0 69.1 \n",
+ " 23 Espírito Santo 67.5 69.0 \n",
+ " 24 Rio de Janeiro 85.1 77.2 \n",
+ " 25 São Paulo 60.7 50.2 \n",
+ " 26 Paraná 72.3 54.2 \n",
+ " 27 Santa Catarina 75.8 65.3 \n",
+ " 28 Rio Grande do Sul 73.6 66.1 \n",
+ " 29 Mato Grosso do Sul 86.0 81.2 \n",
+ " 30 Mato Grosso 76.3 66.2 \n",
+ " 31 Goiás 76.8 75.7 \n",
+ " 32 Distrito Federal 74.6 70.6 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 65.3 \n",
+ " 1 75.7 \n",
+ " 2 77.9 \n",
+ " 3 58.4 \n",
+ " 4 62.6 \n",
+ " 5 73.6 \n",
+ " 6 76.2 \n",
+ " 7 60.5 \n",
+ " 8 75.5 \n",
+ " 9 76.9 \n",
+ " 10 77.7 \n",
+ " 11 69.4 \n",
+ " 12 75.9 \n",
+ " 13 76.2 \n",
+ " 14 80.5 \n",
+ " 15 83.7 \n",
+ " 16 78.0 \n",
+ " 17 88.0 \n",
+ " 18 81.3 \n",
+ " 19 88.3 \n",
+ " 20 85.7 \n",
+ " 21 67.1 \n",
+ " 22 69.1 \n",
+ " 23 69.1 \n",
+ " 24 86.7 \n",
+ " 25 51.5 \n",
+ " 26 59.5 \n",
+ " 27 71.9 \n",
+ " 28 60.4 \n",
+ " 29 75.4 \n",
+ " 30 78.1 \n",
+ " 31 75.5 \n",
+ " 32 69.6 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2008': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 66.5 54.5 \n",
+ " 1 Norte 61.2 44.8 \n",
+ " 2 Nordeste 60.7 45.7 \n",
+ " 3 Sudeste 65.7 53.5 \n",
+ " 4 Sul 73.5 63.2 \n",
+ " 5 Centro-Oeste 72.3 63.6 \n",
+ " 6 Rondônia 55.1 35.9 \n",
+ " 7 Acre 43.8 29.7 \n",
+ " 8 Amazonas 58.0 57.0 \n",
+ " 9 Roraima 44.2 28.4 \n",
+ " 10 Pará 65.1 48.3 \n",
+ " 11 Amapá 57.0 54.4 \n",
+ " 12 Tocantins 67.1 46.3 \n",
+ " 13 Maranhão 62.8 44.3 \n",
+ " 14 Piauí 66.3 57.9 \n",
+ " 15 Ceará 55.9 38.6 \n",
+ " 16 Rio Grande do Norte 47.1 45.2 \n",
+ " 17 Paraíba 54.0 48.1 \n",
+ " 18 Pernambuco 73.5 58.6 \n",
+ " 19 Alagoas 49.0 46.1 \n",
+ " 20 Sergipe 68.5 58.9 \n",
+ " 21 Bahia 60.5 44.4 \n",
+ " 22 Minas Gerais 73.1 61.8 \n",
+ " 23 Espírito Santo 62.9 61.2 \n",
+ " 24 Rio de Janeiro 82.7 72.7 \n",
+ " 25 São Paulo 57.5 48.2 \n",
+ " 26 Paraná 78.0 60.6 \n",
+ " 27 Santa Catarina 69.5 64.5 \n",
+ " 28 Rio Grande do Sul 70.1 65.0 \n",
+ " 29 Mato Grosso do Sul 84.6 75.7 \n",
+ " 30 Mato Grosso 72.6 66.2 \n",
+ " 31 Goiás 68.0 62.1 \n",
+ " 32 Distrito Federal 62.0 60.0 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 57.3 \n",
+ " 1 50.7 \n",
+ " 2 49.4 \n",
+ " 3 56.6 \n",
+ " 4 65.0 \n",
+ " 5 60.5 \n",
+ " 6 48.8 \n",
+ " 7 47.2 \n",
+ " 8 62.2 \n",
+ " 9 41.2 \n",
+ " 10 48.7 \n",
+ " 11 60.0 \n",
+ " 12 48.2 \n",
+ " 13 45.3 \n",
+ " 14 51.0 \n",
+ " 15 48.7 \n",
+ " 16 44.2 \n",
+ " 17 52.3 \n",
+ " 18 65.1 \n",
+ " 19 61.1 \n",
+ " 20 65.7 \n",
+ " 21 41.9 \n",
+ " 22 58.6 \n",
+ " 23 73.5 \n",
+ " 24 80.2 \n",
+ " 25 51.5 \n",
+ " 26 59.6 \n",
+ " 27 72.5 \n",
+ " 28 64.0 \n",
+ " 29 80.0 \n",
+ " 30 72.7 \n",
+ " 31 55.8 \n",
+ " 32 60.7 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2009': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 68.4 54.8 \n",
+ " 1 Norte 69.8 51.0 \n",
+ " 2 Nordeste 66.9 50.7 \n",
+ " 3 Sudeste 65.6 52.1 \n",
+ " 4 Sul 75.8 63.8 \n",
+ " 5 Centro-Oeste 73.1 65.6 \n",
+ " 6 Rondônia 64.2 47.3 \n",
+ " 7 Acre 51.6 37.8 \n",
+ " 8 Amazonas 70.6 56.7 \n",
+ " 9 Roraima 50.6 51.7 \n",
+ " 10 Pará 74.2 54.0 \n",
+ " 11 Amapá 64.5 49.2 \n",
+ " 12 Tocantins 72.8 50.5 \n",
+ " 13 Maranhão 64.4 48.8 \n",
+ " 14 Piauí 70.6 47.2 \n",
+ " 15 Ceará 64.3 44.5 \n",
+ " 16 Rio Grande do Norte 53.4 57.9 \n",
+ " 17 Paraíba 70.4 47.9 \n",
+ " 18 Pernambuco 78.3 64.5 \n",
+ " 19 Alagoas 54.1 58.9 \n",
+ " 20 Sergipe 78.2 61.1 \n",
+ " 21 Bahia 65.5 50.2 \n",
+ " 22 Minas Gerais 75.4 64.4 \n",
+ " 23 Espírito Santo 69.9 60.2 \n",
+ " 24 Rio de Janeiro 84.6 74.3 \n",
+ " 25 São Paulo 57.0 47.3 \n",
+ " 26 Paraná 80.6 61.1 \n",
+ " 27 Santa Catarina 57.4 61.8 \n",
+ " 28 Rio Grande do Sul 75.0 67.9 \n",
+ " 29 Mato Grosso do Sul 84.1 78.9 \n",
+ " 30 Mato Grosso 73.2 69.7 \n",
+ " 31 Goiás 65.7 61.2 \n",
+ " 32 Distrito Federal 66.4 64.6 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 56.0 \n",
+ " 1 57.5 \n",
+ " 2 56.6 \n",
+ " 3 52.1 \n",
+ " 4 62.1 \n",
+ " 5 62.2 \n",
+ " 6 45.1 \n",
+ " 7 43.4 \n",
+ " 8 61.4 \n",
+ " 9 21.4 \n",
+ " 10 62.1 \n",
+ " 11 54.7 \n",
+ " 12 55.2 \n",
+ " 13 48.2 \n",
+ " 14 61.4 \n",
+ " 15 52.6 \n",
+ " 16 51.1 \n",
+ " 17 50.2 \n",
+ " 18 69.3 \n",
+ " 19 64.1 \n",
+ " 20 68.9 \n",
+ " 21 54.4 \n",
+ " 22 59.9 \n",
+ " 23 69.9 \n",
+ " 24 74.8 \n",
+ " 25 46.0 \n",
+ " 26 57.4 \n",
+ " 27 57.4 \n",
+ " 28 70.3 \n",
+ " 29 80.6 \n",
+ " 30 70.9 \n",
+ " 31 56.2 \n",
+ " 32 67.5 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2010': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 67.5 57.2 \n",
+ " 1 Norte 70.0 59.4 \n",
+ " 2 Nordeste 67.1 57.8 \n",
+ " 3 Sudeste 65.0 52.8 \n",
+ " 4 Sul 72.4 62.8 \n",
+ " 5 Centro-Oeste 68.6 67.2 \n",
+ " 6 Rondônia 67.5 60.7 \n",
+ " 7 Acre 57.2 49.4 \n",
+ " 8 Amazonas 70.7 64.2 \n",
+ " 9 Roraima 50.6 59.2 \n",
+ " 10 Pará 74.9 62.5 \n",
+ " 11 Amapá 66.7 54.7 \n",
+ " 12 Tocantins 68.6 57.5 \n",
+ " 13 Maranhão 65.2 55.5 \n",
+ " 14 Piauí 69.2 56.1 \n",
+ " 15 Ceará 64.2 50.7 \n",
+ " 16 Rio Grande do Norte 56.2 66.8 \n",
+ " 17 Paraíba 71.7 56.5 \n",
+ " 18 Pernambuco 74.9 65.6 \n",
+ " 19 Alagoas 57.6 60.2 \n",
+ " 20 Sergipe 78.2 72.2 \n",
+ " 21 Bahia 67.4 59.3 \n",
+ " 22 Minas Gerais 69.3 66.1 \n",
+ " 23 Espírito Santo 59.1 59.0 \n",
+ " 24 Rio de Janeiro 82.2 73.3 \n",
+ " 25 São Paulo 58.4 47.4 \n",
+ " 26 Paraná 76.0 59.0 \n",
+ " 27 Santa Catarina 54.1 62.1 \n",
+ " 28 Rio Grande do Sul 73.0 67.8 \n",
+ " 29 Mato Grosso do Sul 81.2 78.7 \n",
+ " 30 Mato Grosso 64.6 66.7 \n",
+ " 31 Goiás 62.2 66.2 \n",
+ " 32 Distrito Federal 62.7 61.7 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 56.4 \n",
+ " 1 63.0 \n",
+ " 2 60.3 \n",
+ " 3 52.1 \n",
+ " 4 55.6 \n",
+ " 5 64.6 \n",
+ " 6 45.9 \n",
+ " 7 56.6 \n",
+ " 8 70.9 \n",
+ " 9 58.6 \n",
+ " 10 70.8 \n",
+ " 11 54.1 \n",
+ " 12 56.0 \n",
+ " 13 56.7 \n",
+ " 14 65.9 \n",
+ " 15 50.6 \n",
+ " 16 67.4 \n",
+ " 17 55.6 \n",
+ " 18 69.9 \n",
+ " 19 69.1 \n",
+ " 20 80.1 \n",
+ " 21 57.6 \n",
+ " 22 58.9 \n",
+ " 23 60.1 \n",
+ " 24 67.7 \n",
+ " 25 47.3 \n",
+ " 26 51.1 \n",
+ " 27 53.0 \n",
+ " 28 62.9 \n",
+ " 29 75.6 \n",
+ " 30 69.6 \n",
+ " 31 63.2 \n",
+ " 32 61.7 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2011': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 66.5 58.5 \n",
+ " 1 Norte 69.1 63.2 \n",
+ " 2 Nordeste 66.9 61.9 \n",
+ " 3 Sudeste 64.7 53.6 \n",
+ " 4 Sul 69.1 60.6 \n",
+ " 5 Centro-Oeste 66.1 66.2 \n",
+ " 6 Rondônia 67.5 61.4 \n",
+ " 7 Acre 57.1 53.4 \n",
+ " 8 Amazonas 69.7 69.5 \n",
+ " 9 Roraima 49.4 65.6 \n",
+ " 10 Pará 73.4 66.5 \n",
+ " 11 Amapá 69.8 55.4 \n",
+ " 12 Tocantins 67.2 62.8 \n",
+ " 13 Maranhão 64.8 60.2 \n",
+ " 14 Piauí 67.1 61.0 \n",
+ " 15 Ceará 62.1 56.8 \n",
+ " 16 Rio Grande do Norte 57.6 68.9 \n",
+ " 17 Paraíba 71.2 60.8 \n",
+ " 18 Pernambuco 74.3 67.4 \n",
+ " 19 Alagoas 60.4 64.5 \n",
+ " 20 Sergipe 78.4 73.3 \n",
+ " 21 Bahia 67.7 61.6 \n",
+ " 22 Minas Gerais 65.1 65.3 \n",
+ " 23 Espírito Santo 54.4 58.0 \n",
+ " 24 Rio de Janeiro 79.6 72.8 \n",
+ " 25 São Paulo 60.4 47.5 \n",
+ " 26 Paraná 73.5 56.1 \n",
+ " 27 Santa Catarina 50.3 60.1 \n",
+ " 28 Rio Grande do Sul 69.8 66.4 \n",
+ " 29 Mato Grosso do Sul 78.4 79.3 \n",
+ " 30 Mato Grosso 58.8 62.5 \n",
+ " 31 Goiás 60.5 65.1 \n",
+ " 32 Distrito Federal 63.0 61.0 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 56.1 \n",
+ " 1 62.9 \n",
+ " 2 59.9 \n",
+ " 3 52.5 \n",
+ " 4 54.3 \n",
+ " 5 62.5 \n",
+ " 6 51.0 \n",
+ " 7 51.2 \n",
+ " 8 68.9 \n",
+ " 9 48.1 \n",
+ " 10 69.1 \n",
+ " 11 57.7 \n",
+ " 12 62.9 \n",
+ " 13 54.6 \n",
+ " 14 63.0 \n",
+ " 15 57.7 \n",
+ " 16 68.8 \n",
+ " 17 50.5 \n",
+ " 18 65.9 \n",
+ " 19 62.8 \n",
+ " 20 73.5 \n",
+ " 21 57.1 \n",
+ " 22 59.8 \n",
+ " 23 59.1 \n",
+ " 24 66.3 \n",
+ " 25 47.4 \n",
+ " 26 50.7 \n",
+ " 27 51.3 \n",
+ " 28 61.2 \n",
+ " 29 68.9 \n",
+ " 30 64.6 \n",
+ " 31 63.8 \n",
+ " 32 57.2 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2012': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 64.4 58.9 \n",
+ " 1 Norte 67.0 64.7 \n",
+ " 2 Nordeste 66.2 63.7 \n",
+ " 3 Sudeste 62.0 53.1 \n",
+ " 4 Sul 67.1 61.4 \n",
+ " 5 Centro-Oeste 62.5 64.3 \n",
+ " 6 Rondônia 67.0 60.8 \n",
+ " 7 Acre 54.5 58.5 \n",
+ " 8 Amazonas 66.0 72.1 \n",
+ " 9 Roraima 43.5 62.5 \n",
+ " 10 Pará 70.2 67.7 \n",
+ " 11 Amapá 68.3 61.9 \n",
+ " 12 Tocantins 68.3 62.9 \n",
+ " 13 Maranhão 62.6 60.3 \n",
+ " 14 Piauí 68.5 61.3 \n",
+ " 15 Ceará 57.8 59.4 \n",
+ " 16 Rio Grande do Norte 57.6 69.9 \n",
+ " 17 Paraíba 69.3 61.9 \n",
+ " 18 Pernambuco 74.2 69.6 \n",
+ " 19 Alagoas 61.7 66.2 \n",
+ " 20 Sergipe 79.5 75.4 \n",
+ " 21 Bahia 68.0 64.0 \n",
+ " 22 Minas Gerais 57.8 63.3 \n",
+ " 23 Espírito Santo 50.4 59.4 \n",
+ " 24 Rio de Janeiro 78.8 71.8 \n",
+ " 25 São Paulo 59.6 46.6 \n",
+ " 26 Paraná 73.8 56.9 \n",
+ " 27 Santa Catarina 41.5 59.9 \n",
+ " 28 Rio Grande do Sul 67.0 67.6 \n",
+ " 29 Mato Grosso do Sul 77.2 78.6 \n",
+ " 30 Mato Grosso 47.1 57.6 \n",
+ " 31 Goiás 56.6 63.0 \n",
+ " 32 Distrito Federal 62.6 59.9 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 55.3 \n",
+ " 1 62.9 \n",
+ " 2 60.9 \n",
+ " 3 51.4 \n",
+ " 4 51.8 \n",
+ " 5 61.5 \n",
+ " 6 53.9 \n",
+ " 7 52.0 \n",
+ " 8 66.0 \n",
+ " 9 57.5 \n",
+ " 10 70.9 \n",
+ " 11 52.5 \n",
+ " 12 61.0 \n",
+ " 13 57.8 \n",
+ " 14 58.3 \n",
+ " 15 59.0 \n",
+ " 16 67.7 \n",
+ " 17 52.1 \n",
+ " 18 67.1 \n",
+ " 19 63.5 \n",
+ " 20 73.7 \n",
+ " 21 59.8 \n",
+ " 22 59.8 \n",
+ " 23 56.2 \n",
+ " 24 60.5 \n",
+ " 25 46.6 \n",
+ " 26 46.9 \n",
+ " 27 49.9 \n",
+ " 28 59.4 \n",
+ " 29 69.8 \n",
+ " 30 57.5 \n",
+ " 31 64.7 \n",
+ " 32 53.6 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2013': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 61.8 59.3 \n",
+ " 1 Norte 65.1 65.2 \n",
+ " 2 Nordeste 63.8 66.2 \n",
+ " 3 Sudeste 59.5 52.2 \n",
+ " 4 Sul 63.0 63.8 \n",
+ " 5 Centro-Oeste 60.1 61.6 \n",
+ " 6 Rondônia 66.3 62.2 \n",
+ " 7 Acre 55.8 56.4 \n",
+ " 8 Amazonas 63.2 66.0 \n",
+ " 9 Roraima 38.4 58.2 \n",
+ " 10 Pará 69.1 69.9 \n",
+ " 11 Amapá 64.4 60.6 \n",
+ " 12 Tocantins 63.3 66.0 \n",
+ " 13 Maranhão 60.5 61.9 \n",
+ " 14 Piauí 66.0 66.3 \n",
+ " 15 Ceará 50.2 63.1 \n",
+ " 16 Rio Grande do Norte 56.4 72.8 \n",
+ " 17 Paraíba 68.0 67.2 \n",
+ " 18 Pernambuco 73.1 71.1 \n",
+ " 19 Alagoas 61.3 65.7 \n",
+ " 20 Sergipe 76.4 77.6 \n",
+ " 21 Bahia 66.8 65.8 \n",
+ " 22 Minas Gerais 52.4 60.5 \n",
+ " 23 Espírito Santo 47.8 61.8 \n",
+ " 24 Rio de Janeiro 77.1 74.3 \n",
+ " 25 São Paulo 58.4 44.8 \n",
+ " 26 Paraná 70.7 59.6 \n",
+ " 27 Santa Catarina 35.1 59.7 \n",
+ " 28 Rio Grande do Sul 62.8 70.3 \n",
+ " 29 Mato Grosso do Sul 76.4 75.2 \n",
+ " 30 Mato Grosso 41.9 47.7 \n",
+ " 31 Goiás 53.5 64.7 \n",
+ " 32 Distrito Federal 63.5 58.7 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 55.4 \n",
+ " 1 62.0 \n",
+ " 2 61.9 \n",
+ " 3 51.0 \n",
+ " 4 52.9 \n",
+ " 5 61.4 \n",
+ " 6 54.1 \n",
+ " 7 48.6 \n",
+ " 8 63.9 \n",
+ " 9 60.3 \n",
+ " 10 70.6 \n",
+ " 11 54.4 \n",
+ " 12 60.4 \n",
+ " 13 57.8 \n",
+ " 14 63.2 \n",
+ " 15 60.9 \n",
+ " 16 65.5 \n",
+ " 17 56.9 \n",
+ " 18 71.0 \n",
+ " 19 55.5 \n",
+ " 20 72.9 \n",
+ " 21 60.0 \n",
+ " 22 61.1 \n",
+ " 23 51.7 \n",
+ " 24 63.1 \n",
+ " 25 46.1 \n",
+ " 26 48.2 \n",
+ " 27 51.8 \n",
+ " 28 59.4 \n",
+ " 29 67.8 \n",
+ " 30 60.5 \n",
+ " 31 65.1 \n",
+ " 32 53.9 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2014': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 58.7 59.6 \n",
+ " 1 Norte 60.7 65.6 \n",
+ " 2 Nordeste 60.4 68.1 \n",
+ " 3 Sudeste 56.4 51.9 \n",
+ " 4 Sul 61.2 64.5 \n",
+ " 5 Centro-Oeste 56.0 58.6 \n",
+ " 6 Rondônia 57.7 63.7 \n",
+ " 7 Acre 53.6 55.5 \n",
+ " 8 Amazonas 61.6 67.2 \n",
+ " 9 Roraima 35.5 54.8 \n",
+ " 10 Pará 64.8 72.0 \n",
+ " 11 Amapá 60.4 60.3 \n",
+ " 12 Tocantins 56.9 65.2 \n",
+ " 13 Maranhão 59.0 63.6 \n",
+ " 14 Piauí 62.1 68.2 \n",
+ " 15 Ceará 42.5 63.9 \n",
+ " 16 Rio Grande do Norte 52.5 74.8 \n",
+ " 17 Paraíba 65.0 69.4 \n",
+ " 18 Pernambuco 71.2 72.7 \n",
+ " 19 Alagoas 60.1 69.4 \n",
+ " 20 Sergipe 71.9 81.3 \n",
+ " 21 Bahia 64.4 67.9 \n",
+ " 22 Minas Gerais 46.0 56.6 \n",
+ " 23 Espírito Santo 45.6 63.0 \n",
+ " 24 Rio de Janeiro 75.3 73.7 \n",
+ " 25 São Paulo 56.1 44.9 \n",
+ " 26 Paraná 73.2 61.2 \n",
+ " 27 Santa Catarina 30.0 57.5 \n",
+ " 28 Rio Grande do Sul 58.9 70.9 \n",
+ " 29 Mato Grosso do Sul 75.1 73.4 \n",
+ " 30 Mato Grosso 35.0 37.8 \n",
+ " 31 Goiás 46.7 62.0 \n",
+ " 32 Distrito Federal 62.9 61.1 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 54.5 \n",
+ " 1 58.9 \n",
+ " 2 64.7 \n",
+ " 3 49.3 \n",
+ " 4 52.4 \n",
+ " 5 61.0 \n",
+ " 6 52.9 \n",
+ " 7 49.4 \n",
+ " 8 67.0 \n",
+ " 9 64.4 \n",
+ " 10 63.8 \n",
+ " 11 57.8 \n",
+ " 12 60.8 \n",
+ " 13 61.1 \n",
+ " 14 63.6 \n",
+ " 15 62.2 \n",
+ " 16 69.0 \n",
+ " 17 60.3 \n",
+ " 18 68.6 \n",
+ " 19 61.3 \n",
+ " 20 74.1 \n",
+ " 21 67.8 \n",
+ " 22 60.5 \n",
+ " 23 53.9 \n",
+ " 24 63.1 \n",
+ " 25 43.5 \n",
+ " 26 48.4 \n",
+ " 27 50.4 \n",
+ " 28 59.0 \n",
+ " 29 66.4 \n",
+ " 30 58.7 \n",
+ " 31 64.4 \n",
+ " 32 54.2 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2015': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 55.8 59.2 \n",
+ " 1 Norte 57.8 66.0 \n",
+ " 2 Nordeste 57.2 67.6 \n",
+ " 3 Sudeste 53.8 51.1 \n",
+ " 4 Sul 57.6 63.6 \n",
+ " 5 Centro-Oeste 53.3 56.2 \n",
+ " 6 Rondônia 53.0 62.4 \n",
+ " 7 Acre 50.5 54.1 \n",
+ " 8 Amazonas 59.2 65.1 \n",
+ " 9 Roraima 32.1 54.7 \n",
+ " 10 Pará 62.8 73.7 \n",
+ " 11 Amapá 55.7 61.7 \n",
+ " 12 Tocantins 53.1 64.9 \n",
+ " 13 Maranhão 55.3 63.7 \n",
+ " 14 Piauí 60.1 67.5 \n",
+ " 15 Ceará 36.3 60.4 \n",
+ " 16 Rio Grande do Norte 49.0 74.2 \n",
+ " 17 Paraíba 62.9 70.4 \n",
+ " 18 Pernambuco 69.3 73.1 \n",
+ " 19 Alagoas 58.1 69.9 \n",
+ " 20 Sergipe 68.2 81.0 \n",
+ " 21 Bahia 62.7 68.6 \n",
+ " 22 Minas Gerais 41.7 53.1 \n",
+ " 23 Espírito Santo 43.0 62.6 \n",
+ " 24 Rio de Janeiro 72.9 72.3 \n",
+ " 25 São Paulo 54.5 44.4 \n",
+ " 26 Paraná 69.7 61.2 \n",
+ " 27 Santa Catarina 26.5 54.1 \n",
+ " 28 Rio Grande do Sul 56.0 70.5 \n",
+ " 29 Mato Grosso do Sul 73.1 69.4 \n",
+ " 30 Mato Grosso 30.8 34.0 \n",
+ " 31 Goiás 42.7 60.6 \n",
+ " 32 Distrito Federal 62.1 59.7 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 53.7 \n",
+ " 1 58.4 \n",
+ " 2 64.4 \n",
+ " 3 48.3 \n",
+ " 4 52.0 \n",
+ " 5 58.3 \n",
+ " 6 53.3 \n",
+ " 7 49.6 \n",
+ " 8 68.1 \n",
+ " 9 54.5 \n",
+ " 10 65.6 \n",
+ " 11 55.5 \n",
+ " 12 58.9 \n",
+ " 13 61.0 \n",
+ " 14 65.6 \n",
+ " 15 62.6 \n",
+ " 16 69.1 \n",
+ " 17 60.4 \n",
+ " 18 66.0 \n",
+ " 19 57.8 \n",
+ " 20 73.7 \n",
+ " 21 67.6 \n",
+ " 22 60.9 \n",
+ " 23 54.2 \n",
+ " 24 62.0 \n",
+ " 25 41.7 \n",
+ " 26 48.0 \n",
+ " 27 50.1 \n",
+ " 28 58.0 \n",
+ " 29 67.2 \n",
+ " 30 52.3 \n",
+ " 31 61.6 \n",
+ " 32 52.7 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2016': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 53.5 58.2 \n",
+ " 1 Norte 55.7 65.6 \n",
+ " 2 Nordeste 54.3 66.9 \n",
+ " 3 Sudeste 51.4 49.7 \n",
+ " 4 Sul 56.9 61.2 \n",
+ " 5 Centro-Oeste 51.0 55.3 \n",
+ " 6 Rondônia 47.2 62.7 \n",
+ " 7 Acre 46.8 55.5 \n",
+ " 8 Amazonas 57.7 63.3 \n",
+ " 9 Roraima 28.9 52.9 \n",
+ " 10 Pará 61.3 73.2 \n",
+ " 11 Amapá 54.4 62.0 \n",
+ " 12 Tocantins 52.5 62.7 \n",
+ " 13 Maranhão 53.1 64.2 \n",
+ " 14 Piauí 57.3 67.5 \n",
+ " 15 Ceará 30.9 55.6 \n",
+ " 16 Rio Grande do Norte 45.4 72.9 \n",
+ " 17 Paraíba 60.3 71.4 \n",
+ " 18 Pernambuco 65.8 74.0 \n",
+ " 19 Alagoas 56.1 70.7 \n",
+ " 20 Sergipe 66.1 79.4 \n",
+ " 21 Bahia 61.4 69.9 \n",
+ " 22 Minas Gerais 39.3 50.9 \n",
+ " 23 Espírito Santo 40.7 59.8 \n",
+ " 24 Rio de Janeiro 70.1 70.2 \n",
+ " 25 São Paulo 51.9 43.3 \n",
+ " 26 Paraná 69.5 59.3 \n",
+ " 27 Santa Catarina 24.5 47.9 \n",
+ " 28 Rio Grande do Sul 54.9 68.4 \n",
+ " 29 Mato Grosso do Sul 71.9 67.3 \n",
+ " 30 Mato Grosso 30.0 31.1 \n",
+ " 31 Goiás 40.0 60.1 \n",
+ " 32 Distrito Federal 59.7 59.9 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 53.9 \n",
+ " 1 60.4 \n",
+ " 2 64.6 \n",
+ " 3 47.8 \n",
+ " 4 53.6 \n",
+ " 5 57.3 \n",
+ " 6 54.4 \n",
+ " 7 51.1 \n",
+ " 8 67.3 \n",
+ " 9 51.1 \n",
+ " 10 69.7 \n",
+ " 11 55.5 \n",
+ " 12 60.9 \n",
+ " 13 58.7 \n",
+ " 14 63.5 \n",
+ " 15 64.1 \n",
+ " 16 68.3 \n",
+ " 17 65.5 \n",
+ " 18 65.4 \n",
+ " 19 64.6 \n",
+ " 20 72.0 \n",
+ " 21 68.0 \n",
+ " 22 59.8 \n",
+ " 23 56.2 \n",
+ " 24 58.3 \n",
+ " 25 41.2 \n",
+ " 26 49.1 \n",
+ " 27 50.7 \n",
+ " 28 61.0 \n",
+ " 29 68.0 \n",
+ " 30 49.8 \n",
+ " 31 61.6 \n",
+ " 32 50.8 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2017': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 51.4 56.8 \n",
+ " 1 Norte 53.1 64.8 \n",
+ " 2 Nordeste 52.2 65.3 \n",
+ " 3 Sudeste 49.4 48.3 \n",
+ " 4 Sul 54.6 59.2 \n",
+ " 5 Centro-Oeste 48.2 52.5 \n",
+ " 6 Rondônia 42.7 60.5 \n",
+ " 7 Acre 45.1 54.0 \n",
+ " 8 Amazonas 54.5 62.9 \n",
+ " 9 Roraima 26.2 50.3 \n",
+ " 10 Pará 59.9 72.7 \n",
+ " 11 Amapá 53.8 60.4 \n",
+ " 12 Tocantins 44.1 61.6 \n",
+ " 13 Maranhão 50.5 63.3 \n",
+ " 14 Piauí 54.4 63.7 \n",
+ " 15 Ceará 28.4 50.8 \n",
+ " 16 Rio Grande do Norte 43.2 69.6 \n",
+ " 17 Paraíba 59.1 71.3 \n",
+ " 18 Pernambuco 62.6 73.2 \n",
+ " 19 Alagoas 55.1 69.8 \n",
+ " 20 Sergipe 62.3 78.4 \n",
+ " 21 Bahia 59.6 70.4 \n",
+ " 22 Minas Gerais 36.8 49.4 \n",
+ " 23 Espírito Santo 37.9 58.2 \n",
+ " 24 Rio de Janeiro 68.0 67.8 \n",
+ " 25 São Paulo 50.8 41.2 \n",
+ " 26 Paraná 66.0 58.0 \n",
+ " 27 Santa Catarina 23.0 44.4 \n",
+ " 28 Rio Grande do Sul 53.4 66.9 \n",
+ " 29 Mato Grosso do Sul 71.5 68.3 \n",
+ " 30 Mato Grosso 29.3 33.3 \n",
+ " 31 Goiás 37.3 54.6 \n",
+ " 32 Distrito Federal 56.8 56.1 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 53.1 \n",
+ " 1 59.9 \n",
+ " 2 61.8 \n",
+ " 3 46.7 \n",
+ " 4 54.9 \n",
+ " 5 55.5 \n",
+ " 6 52.2 \n",
+ " 7 48.1 \n",
+ " 8 64.1 \n",
+ " 9 51.2 \n",
+ " 10 69.8 \n",
+ " 11 54.6 \n",
+ " 12 59.5 \n",
+ " 13 56.2 \n",
+ " 14 59.9 \n",
+ " 15 60.8 \n",
+ " 16 67.9 \n",
+ " 17 66.9 \n",
+ " 18 65.2 \n",
+ " 19 52.6 \n",
+ " 20 75.2 \n",
+ " 21 65.7 \n",
+ " 22 57.4 \n",
+ " 23 51.5 \n",
+ " 24 58.6 \n",
+ " 25 39.6 \n",
+ " 26 51.2 \n",
+ " 27 50.5 \n",
+ " 28 62.0 \n",
+ " 29 65.6 \n",
+ " 30 46.4 \n",
+ " 31 60.5 \n",
+ " 32 50.5 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2018': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 48.4 55.0 \n",
+ " 1 Norte 49.9 62.6 \n",
+ " 2 Nordeste 48.9 63.4 \n",
+ " 3 Sudeste 46.6 46.8 \n",
+ " 4 Sul 52.5 57.0 \n",
+ " 5 Centro-Oeste 44.1 49.8 \n",
+ " 6 Rondônia 36.6 58.4 \n",
+ " 7 Acre 41.4 51.5 \n",
+ " 8 Amazonas 51.6 60.7 \n",
+ " 9 Roraima 25.2 45.0 \n",
+ " 10 Pará 57.7 70.7 \n",
+ " 11 Amapá 51.3 60.2 \n",
+ " 12 Tocantins 38.7 57.7 \n",
+ " 13 Maranhão 47.7 61.2 \n",
+ " 14 Piauí 50.9 62.2 \n",
+ " 15 Ceará 24.6 46.0 \n",
+ " 16 Rio Grande do Norte 41.0 67.5 \n",
+ " 17 Paraíba 57.0 70.7 \n",
+ " 18 Pernambuco 57.0 70.8 \n",
+ " 19 Alagoas 50.1 68.2 \n",
+ " 20 Sergipe 60.8 76.4 \n",
+ " 21 Bahia 56.8 69.9 \n",
+ " 22 Minas Gerais 34.0 48.1 \n",
+ " 23 Espírito Santo 34.5 55.4 \n",
+ " 24 Rio de Janeiro 64.7 66.6 \n",
+ " 25 São Paulo 48.3 38.8 \n",
+ " 26 Paraná 63.0 55.2 \n",
+ " 27 Santa Catarina 21.7 41.4 \n",
+ " 28 Rio Grande do Sul 51.9 65.4 \n",
+ " 29 Mato Grosso do Sul 67.4 67.6 \n",
+ " 30 Mato Grosso 27.9 34.6 \n",
+ " 31 Goiás 32.9 50.0 \n",
+ " 32 Distrito Federal 54.3 53.8 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 53.2 \n",
+ " 1 61.0 \n",
+ " 2 61.2 \n",
+ " 3 47.0 \n",
+ " 4 54.1 \n",
+ " 5 53.2 \n",
+ " 6 51.2 \n",
+ " 7 48.4 \n",
+ " 8 61.4 \n",
+ " 9 52.5 \n",
+ " 10 70.6 \n",
+ " 11 56.1 \n",
+ " 12 58.8 \n",
+ " 13 56.3 \n",
+ " 14 55.7 \n",
+ " 15 57.4 \n",
+ " 16 67.7 \n",
+ " 17 66.1 \n",
+ " 18 63.2 \n",
+ " 19 60.0 \n",
+ " 20 75.8 \n",
+ " 21 63.9 \n",
+ " 22 55.4 \n",
+ " 23 52.4 \n",
+ " 24 60.3 \n",
+ " 25 38.8 \n",
+ " 26 49.9 \n",
+ " 27 47.4 \n",
+ " 28 63.0 \n",
+ " 29 62.6 \n",
+ " 30 43.8 \n",
+ " 31 58.2 \n",
+ " 32 47.6 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2019': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 44.9 53.3 \n",
+ " 1 Norte 46.8 60.9 \n",
+ " 2 Nordeste 45.1 61.3 \n",
+ " 3 Sudeste 42.6 45.2 \n",
+ " 4 Sul 50.1 55.0 \n",
+ " 5 Centro-Oeste 41.1 48.3 \n",
+ " 6 Rondônia 30.9 52.9 \n",
+ " 7 Acre 36.9 49.6 \n",
+ " 8 Amazonas 49.0 59.5 \n",
+ " 9 Roraima 24.8 43.4 \n",
+ " 10 Pará 55.1 69.7 \n",
+ " 11 Amapá 46.4 59.8 \n",
+ " 12 Tocantins 35.5 55.8 \n",
+ " 13 Maranhão 44.7 59.9 \n",
+ " 14 Piauí 45.6 62.2 \n",
+ " 15 Ceará 21.6 40.5 \n",
+ " 16 Rio Grande do Norte 37.6 64.8 \n",
+ " 17 Paraíba 53.8 70.6 \n",
+ " 18 Pernambuco 51.2 68.4 \n",
+ " 19 Alagoas 45.0 65.0 \n",
+ " 20 Sergipe 57.4 75.1 \n",
+ " 21 Bahia 53.5 69.5 \n",
+ " 22 Minas Gerais 30.7 45.6 \n",
+ " 23 Espírito Santo 30.8 53.4 \n",
+ " 24 Rio de Janeiro 60.4 65.9 \n",
+ " 25 São Paulo 43.9 37.5 \n",
+ " 26 Paraná 61.6 51.5 \n",
+ " 27 Santa Catarina 20.0 38.7 \n",
+ " 28 Rio Grande do Sul 49.1 64.2 \n",
+ " 29 Mato Grosso do Sul 65.0 66.2 \n",
+ " 30 Mato Grosso 27.5 33.9 \n",
+ " 31 Goiás 30.2 47.7 \n",
+ " 32 Distrito Federal 44.8 50.5 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 51.1 \n",
+ " 1 59.3 \n",
+ " 2 59.4 \n",
+ " 3 44.7 \n",
+ " 4 50.9 \n",
+ " 5 51.4 \n",
+ " 6 48.3 \n",
+ " 7 46.7 \n",
+ " 8 62.3 \n",
+ " 9 47.7 \n",
+ " 10 68.5 \n",
+ " 11 51.7 \n",
+ " 12 56.8 \n",
+ " 13 54.8 \n",
+ " 14 55.8 \n",
+ " 15 52.0 \n",
+ " 16 66.1 \n",
+ " 17 64.5 \n",
+ " 18 65.5 \n",
+ " 19 59.3 \n",
+ " 20 74.6 \n",
+ " 21 63.1 \n",
+ " 22 50.7 \n",
+ " 23 50.2 \n",
+ " 24 59.5 \n",
+ " 25 36.9 \n",
+ " 26 45.2 \n",
+ " 27 43.0 \n",
+ " 28 59.4 \n",
+ " 29 59.2 \n",
+ " 30 43.9 \n",
+ " 31 56.0 \n",
+ " 32 45.1 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2020': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 41.7 51.6 \n",
+ " 1 Norte 44.0 58.8 \n",
+ " 2 Nordeste 40.9 59.3 \n",
+ " 3 Sudeste 39.3 43.8 \n",
+ " 4 Sul 48.2 53.7 \n",
+ " 5 Centro-Oeste 38.1 45.4 \n",
+ " 6 Rondônia 29.2 47.8 \n",
+ " 7 Acre 34.9 47.4 \n",
+ " 8 Amazonas 45.9 57.8 \n",
+ " 9 Roraima 25.2 37.0 \n",
+ " 10 Pará 52.0 68.6 \n",
+ " 11 Amapá 41.1 59.7 \n",
+ " 12 Tocantins 32.2 52.9 \n",
+ " 13 Maranhão 41.2 58.6 \n",
+ " 14 Piauí 39.8 58.2 \n",
+ " 15 Ceará 18.4 37.1 \n",
+ " 16 Rio Grande do Norte 33.4 62.5 \n",
+ " 17 Paraíba 50.3 68.4 \n",
+ " 18 Pernambuco 43.5 65.2 \n",
+ " 19 Alagoas 39.7 61.2 \n",
+ " 20 Sergipe 53.9 74.8 \n",
+ " 21 Bahia 50.6 68.8 \n",
+ " 22 Minas Gerais 28.2 44.7 \n",
+ " 23 Espírito Santo 27.9 51.2 \n",
+ " 24 Rio de Janeiro 55.4 64.5 \n",
+ " 25 São Paulo 40.7 35.4 \n",
+ " 26 Paraná 61.5 49.7 \n",
+ " 27 Santa Catarina 18.7 37.4 \n",
+ " 28 Rio Grande do Sul 46.0 63.8 \n",
+ " 29 Mato Grosso do Sul 61.7 65.3 \n",
+ " 30 Mato Grosso 24.5 35.0 \n",
+ " 31 Goiás 29.2 42.5 \n",
+ " 32 Distrito Federal 39.9 49.1 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 50.3 \n",
+ " 1 58.5 \n",
+ " 2 58.5 \n",
+ " 3 44.2 \n",
+ " 4 48.8 \n",
+ " 5 49.4 \n",
+ " 6 49.1 \n",
+ " 7 46.7 \n",
+ " 8 58.6 \n",
+ " 9 48.3 \n",
+ " 10 67.6 \n",
+ " 11 52.2 \n",
+ " 12 54.8 \n",
+ " 13 52.4 \n",
+ " 14 55.8 \n",
+ " 15 49.2 \n",
+ " 16 62.7 \n",
+ " 17 62.1 \n",
+ " 18 63.7 \n",
+ " 19 58.8 \n",
+ " 20 71.3 \n",
+ " 21 64.9 \n",
+ " 22 49.6 \n",
+ " 23 49.4 \n",
+ " 24 60.4 \n",
+ " 25 35.7 \n",
+ " 26 44.3 \n",
+ " 27 40.0 \n",
+ " 28 58.0 \n",
+ " 29 59.3 \n",
+ " 30 42.7 \n",
+ " 31 51.9 \n",
+ " 32 44.3 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2021': Unnamed: 0 Unnamed: 1 \\\n",
+ " 0 NaN 0 \n",
+ " 1 NaN 1 \n",
+ " 2 NaN 2 \n",
+ " 3 NaN 3 \n",
+ " 4 NaN 4 \n",
+ " 5 NaN 5 \n",
+ " 6 NaN 11 \n",
+ " 7 NaN 12 \n",
+ " 8 NaN 13 \n",
+ " 9 NaN 14 \n",
+ " 10 NaN 15 \n",
+ " 11 NaN 16 \n",
+ " 12 NaN 17 \n",
+ " 13 NaN 21 \n",
+ " 14 NaN 22 \n",
+ " 15 NaN 23 \n",
+ " 16 NaN 24 \n",
+ " 17 NaN 25 \n",
+ " 18 NaN 26 \n",
+ " 19 NaN 27 \n",
+ " 20 NaN 28 \n",
+ " 21 NaN 29 \n",
+ " 22 NaN 31 \n",
+ " 23 NaN 32 \n",
+ " 24 NaN 33 \n",
+ " 25 NaN 35 \n",
+ " 26 NaN 41 \n",
+ " 27 NaN 42 \n",
+ " 28 NaN 43 \n",
+ " 29 NaN 50 \n",
+ " 30 NaN 51 \n",
+ " 31 NaN 52 \n",
+ " 32 NaN 53 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN Notas: Foram consideradas a rede total (escola... \n",
+ " \n",
+ " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n",
+ " 0 Brasil 36.6 49.5 \n",
+ " 1 Norte 36.8 57.7 \n",
+ " 2 Nordeste 34.2 56.9 \n",
+ " 3 Sudeste 36.2 41.1 \n",
+ " 4 Sul 43.1 51.1 \n",
+ " 5 Centro-Oeste 33.6 43.3 \n",
+ " 6 Rondônia 23.4 43.6 \n",
+ " 7 Acre 27.9 46.2 \n",
+ " 8 Amazonas 39.8 56.7 \n",
+ " 9 Roraima 20.5 35.8 \n",
+ " 10 Pará 43.4 68.1 \n",
+ " 11 Amapá 34.0 58.8 \n",
+ " 12 Tocantins 27.0 48.7 \n",
+ " 13 Maranhão 35.3 56.5 \n",
+ " 14 Piauí 33.2 56.3 \n",
+ " 15 Ceará 14.6 33.3 \n",
+ " 16 Rio Grande do Norte 23.6 60.1 \n",
+ " 17 Paraíba 42.4 65.7 \n",
+ " 18 Pernambuco 36.4 60.8 \n",
+ " 19 Alagoas 32.2 57.4 \n",
+ " 20 Sergipe 44.5 72.2 \n",
+ " 21 Bahia 42.9 66.6 \n",
+ " 22 Minas Gerais 25.0 40.4 \n",
+ " 23 Espírito Santo 20.4 47.6 \n",
+ " 24 Rio de Janeiro 49.3 63.4 \n",
+ " 25 São Paulo 38.8 32.6 \n",
+ " 26 Paraná 60.2 46.1 \n",
+ " 27 Santa Catarina 14.6 35.1 \n",
+ " 28 Rio Grande do Sul 37.6 61.6 \n",
+ " 29 Mato Grosso do Sul 56.0 63.4 \n",
+ " 30 Mato Grosso 21.1 32.3 \n",
+ " 31 Goiás 25.2 39.8 \n",
+ " 32 Distrito Federal 36.0 48.9 \n",
+ " 33 NaN NaN NaN \n",
+ " 34 NaN NaN NaN \n",
+ " \n",
+ " Distorção Idade-Série.2 \n",
+ " 0 48.3 \n",
+ " 1 57.5 \n",
+ " 2 57.8 \n",
+ " 3 41.8 \n",
+ " 4 44.9 \n",
+ " 5 46.1 \n",
+ " 6 48.7 \n",
+ " 7 44.4 \n",
+ " 8 55.9 \n",
+ " 9 43.7 \n",
+ " 10 66.7 \n",
+ " 11 54.5 \n",
+ " 12 52.9 \n",
+ " 13 50.8 \n",
+ " 14 55.3 \n",
+ " 15 44.4 \n",
+ " 16 59.9 \n",
+ " 17 61.8 \n",
+ " 18 61.3 \n",
+ " 19 58.9 \n",
+ " 20 68.7 \n",
+ " 21 68.6 \n",
+ " 22 45.1 \n",
+ " 23 47.3 \n",
+ " 24 57.8 \n",
+ " 25 34.9 \n",
+ " 26 40.6 \n",
+ " 27 37.1 \n",
+ " 28 53.2 \n",
+ " 29 57.9 \n",
+ " 30 40.7 \n",
+ " 31 46.5 \n",
+ " 32 43.6 \n",
+ " 33 NaN \n",
+ " 34 NaN }"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dfs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sheet: 2007\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2008\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2009\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2010\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2011\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2012\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2013\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2014\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2015\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2016\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2017\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2018\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2019\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2020\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2021\n",
+ "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n",
+ " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n",
+ " dtype='object')\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sheet_name, df in dfs.items():\n",
+ " print(f\"Sheet: {sheet_name}\")\n",
+ " print(df.columns) # This will print the column names of each DataFrame\n",
+ " print() # Adds a blank line for readability\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataframes = {}\n",
+ "\n",
+ "for table_name, columns in dfs.items():\n",
+ " df = pd.DataFrame(columns) # Create DataFrame for each table\n",
+ " dataframes[table_name] = df # Store the DataFrame in a dictionary\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "RENAME_COLUMNS = {\n",
+ " 'Unnamed: 1':'id_uf',\n",
+ " 'Unnamed: 2':'nome',\n",
+ " 'Distorção Idade-Série': 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Distorção Idade-Série.1':'Ensino Fundamental – Anos Finais',\n",
+ " 'Distorção Idade-Série.2':'Ensino Médio Regular' \n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n",
+ " cols_drop = [\n",
+ " col\n",
+ " for col in df.columns\n",
+ " if col.startswith(\"Unnamed\")\n",
+ " ]\n",
+ "\n",
+ " return df.drop(columns=cols_drop)\n",
+ "\n",
+ "dfs = {\n",
+ " name: drop_unused_columns(\n",
+ " df.rename(columns=RENAME_COLUMNS, errors=\"raise\")\n",
+ " )\n",
+ " for name, df in dfs.items()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'2007': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 72.2 60.6 \n",
+ " 1 76.1 69.7 \n",
+ " 2 74.4 64.7 \n",
+ " 3 69.3 56.5 \n",
+ " 4 73.3 59.6 \n",
+ " 5 78.7 73.6 \n",
+ " 6 72.8 65.8 \n",
+ " 7 67.2 60.0 \n",
+ " 8 76.3 75.7 \n",
+ " 9 71.0 69.5 \n",
+ " 10 78.2 68.4 \n",
+ " 11 76.4 81.0 \n",
+ " 12 75.4 70.7 \n",
+ " 13 74.2 61.2 \n",
+ " 14 83.0 69.3 \n",
+ " 15 67.8 56.9 \n",
+ " 16 66.4 74.0 \n",
+ " 17 73.3 71.6 \n",
+ " 18 78.6 64.5 \n",
+ " 19 72.0 79.2 \n",
+ " 20 85.0 85.4 \n",
+ " 21 76.7 66.5 \n",
+ " 22 79.0 69.1 \n",
+ " 23 67.5 69.0 \n",
+ " 24 85.1 77.2 \n",
+ " 25 60.7 50.2 \n",
+ " 26 72.3 54.2 \n",
+ " 27 75.8 65.3 \n",
+ " 28 73.6 66.1 \n",
+ " 29 86.0 81.2 \n",
+ " 30 76.3 66.2 \n",
+ " 31 76.8 75.7 \n",
+ " 32 74.6 70.6 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 65.3 \n",
+ " 1 75.7 \n",
+ " 2 77.9 \n",
+ " 3 58.4 \n",
+ " 4 62.6 \n",
+ " 5 73.6 \n",
+ " 6 76.2 \n",
+ " 7 60.5 \n",
+ " 8 75.5 \n",
+ " 9 76.9 \n",
+ " 10 77.7 \n",
+ " 11 69.4 \n",
+ " 12 75.9 \n",
+ " 13 76.2 \n",
+ " 14 80.5 \n",
+ " 15 83.7 \n",
+ " 16 78.0 \n",
+ " 17 88.0 \n",
+ " 18 81.3 \n",
+ " 19 88.3 \n",
+ " 20 85.7 \n",
+ " 21 67.1 \n",
+ " 22 69.1 \n",
+ " 23 69.1 \n",
+ " 24 86.7 \n",
+ " 25 51.5 \n",
+ " 26 59.5 \n",
+ " 27 71.9 \n",
+ " 28 60.4 \n",
+ " 29 75.4 \n",
+ " 30 78.1 \n",
+ " 31 75.5 \n",
+ " 32 69.6 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2008': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 66.5 54.5 \n",
+ " 1 61.2 44.8 \n",
+ " 2 60.7 45.7 \n",
+ " 3 65.7 53.5 \n",
+ " 4 73.5 63.2 \n",
+ " 5 72.3 63.6 \n",
+ " 6 55.1 35.9 \n",
+ " 7 43.8 29.7 \n",
+ " 8 58.0 57.0 \n",
+ " 9 44.2 28.4 \n",
+ " 10 65.1 48.3 \n",
+ " 11 57.0 54.4 \n",
+ " 12 67.1 46.3 \n",
+ " 13 62.8 44.3 \n",
+ " 14 66.3 57.9 \n",
+ " 15 55.9 38.6 \n",
+ " 16 47.1 45.2 \n",
+ " 17 54.0 48.1 \n",
+ " 18 73.5 58.6 \n",
+ " 19 49.0 46.1 \n",
+ " 20 68.5 58.9 \n",
+ " 21 60.5 44.4 \n",
+ " 22 73.1 61.8 \n",
+ " 23 62.9 61.2 \n",
+ " 24 82.7 72.7 \n",
+ " 25 57.5 48.2 \n",
+ " 26 78.0 60.6 \n",
+ " 27 69.5 64.5 \n",
+ " 28 70.1 65.0 \n",
+ " 29 84.6 75.7 \n",
+ " 30 72.6 66.2 \n",
+ " 31 68.0 62.1 \n",
+ " 32 62.0 60.0 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 57.3 \n",
+ " 1 50.7 \n",
+ " 2 49.4 \n",
+ " 3 56.6 \n",
+ " 4 65.0 \n",
+ " 5 60.5 \n",
+ " 6 48.8 \n",
+ " 7 47.2 \n",
+ " 8 62.2 \n",
+ " 9 41.2 \n",
+ " 10 48.7 \n",
+ " 11 60.0 \n",
+ " 12 48.2 \n",
+ " 13 45.3 \n",
+ " 14 51.0 \n",
+ " 15 48.7 \n",
+ " 16 44.2 \n",
+ " 17 52.3 \n",
+ " 18 65.1 \n",
+ " 19 61.1 \n",
+ " 20 65.7 \n",
+ " 21 41.9 \n",
+ " 22 58.6 \n",
+ " 23 73.5 \n",
+ " 24 80.2 \n",
+ " 25 51.5 \n",
+ " 26 59.6 \n",
+ " 27 72.5 \n",
+ " 28 64.0 \n",
+ " 29 80.0 \n",
+ " 30 72.7 \n",
+ " 31 55.8 \n",
+ " 32 60.7 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2009': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 68.4 54.8 \n",
+ " 1 69.8 51.0 \n",
+ " 2 66.9 50.7 \n",
+ " 3 65.6 52.1 \n",
+ " 4 75.8 63.8 \n",
+ " 5 73.1 65.6 \n",
+ " 6 64.2 47.3 \n",
+ " 7 51.6 37.8 \n",
+ " 8 70.6 56.7 \n",
+ " 9 50.6 51.7 \n",
+ " 10 74.2 54.0 \n",
+ " 11 64.5 49.2 \n",
+ " 12 72.8 50.5 \n",
+ " 13 64.4 48.8 \n",
+ " 14 70.6 47.2 \n",
+ " 15 64.3 44.5 \n",
+ " 16 53.4 57.9 \n",
+ " 17 70.4 47.9 \n",
+ " 18 78.3 64.5 \n",
+ " 19 54.1 58.9 \n",
+ " 20 78.2 61.1 \n",
+ " 21 65.5 50.2 \n",
+ " 22 75.4 64.4 \n",
+ " 23 69.9 60.2 \n",
+ " 24 84.6 74.3 \n",
+ " 25 57.0 47.3 \n",
+ " 26 80.6 61.1 \n",
+ " 27 57.4 61.8 \n",
+ " 28 75.0 67.9 \n",
+ " 29 84.1 78.9 \n",
+ " 30 73.2 69.7 \n",
+ " 31 65.7 61.2 \n",
+ " 32 66.4 64.6 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 56.0 \n",
+ " 1 57.5 \n",
+ " 2 56.6 \n",
+ " 3 52.1 \n",
+ " 4 62.1 \n",
+ " 5 62.2 \n",
+ " 6 45.1 \n",
+ " 7 43.4 \n",
+ " 8 61.4 \n",
+ " 9 21.4 \n",
+ " 10 62.1 \n",
+ " 11 54.7 \n",
+ " 12 55.2 \n",
+ " 13 48.2 \n",
+ " 14 61.4 \n",
+ " 15 52.6 \n",
+ " 16 51.1 \n",
+ " 17 50.2 \n",
+ " 18 69.3 \n",
+ " 19 64.1 \n",
+ " 20 68.9 \n",
+ " 21 54.4 \n",
+ " 22 59.9 \n",
+ " 23 69.9 \n",
+ " 24 74.8 \n",
+ " 25 46.0 \n",
+ " 26 57.4 \n",
+ " 27 57.4 \n",
+ " 28 70.3 \n",
+ " 29 80.6 \n",
+ " 30 70.9 \n",
+ " 31 56.2 \n",
+ " 32 67.5 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2010': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 67.5 57.2 \n",
+ " 1 70.0 59.4 \n",
+ " 2 67.1 57.8 \n",
+ " 3 65.0 52.8 \n",
+ " 4 72.4 62.8 \n",
+ " 5 68.6 67.2 \n",
+ " 6 67.5 60.7 \n",
+ " 7 57.2 49.4 \n",
+ " 8 70.7 64.2 \n",
+ " 9 50.6 59.2 \n",
+ " 10 74.9 62.5 \n",
+ " 11 66.7 54.7 \n",
+ " 12 68.6 57.5 \n",
+ " 13 65.2 55.5 \n",
+ " 14 69.2 56.1 \n",
+ " 15 64.2 50.7 \n",
+ " 16 56.2 66.8 \n",
+ " 17 71.7 56.5 \n",
+ " 18 74.9 65.6 \n",
+ " 19 57.6 60.2 \n",
+ " 20 78.2 72.2 \n",
+ " 21 67.4 59.3 \n",
+ " 22 69.3 66.1 \n",
+ " 23 59.1 59.0 \n",
+ " 24 82.2 73.3 \n",
+ " 25 58.4 47.4 \n",
+ " 26 76.0 59.0 \n",
+ " 27 54.1 62.1 \n",
+ " 28 73.0 67.8 \n",
+ " 29 81.2 78.7 \n",
+ " 30 64.6 66.7 \n",
+ " 31 62.2 66.2 \n",
+ " 32 62.7 61.7 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 56.4 \n",
+ " 1 63.0 \n",
+ " 2 60.3 \n",
+ " 3 52.1 \n",
+ " 4 55.6 \n",
+ " 5 64.6 \n",
+ " 6 45.9 \n",
+ " 7 56.6 \n",
+ " 8 70.9 \n",
+ " 9 58.6 \n",
+ " 10 70.8 \n",
+ " 11 54.1 \n",
+ " 12 56.0 \n",
+ " 13 56.7 \n",
+ " 14 65.9 \n",
+ " 15 50.6 \n",
+ " 16 67.4 \n",
+ " 17 55.6 \n",
+ " 18 69.9 \n",
+ " 19 69.1 \n",
+ " 20 80.1 \n",
+ " 21 57.6 \n",
+ " 22 58.9 \n",
+ " 23 60.1 \n",
+ " 24 67.7 \n",
+ " 25 47.3 \n",
+ " 26 51.1 \n",
+ " 27 53.0 \n",
+ " 28 62.9 \n",
+ " 29 75.6 \n",
+ " 30 69.6 \n",
+ " 31 63.2 \n",
+ " 32 61.7 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2011': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 66.5 58.5 \n",
+ " 1 69.1 63.2 \n",
+ " 2 66.9 61.9 \n",
+ " 3 64.7 53.6 \n",
+ " 4 69.1 60.6 \n",
+ " 5 66.1 66.2 \n",
+ " 6 67.5 61.4 \n",
+ " 7 57.1 53.4 \n",
+ " 8 69.7 69.5 \n",
+ " 9 49.4 65.6 \n",
+ " 10 73.4 66.5 \n",
+ " 11 69.8 55.4 \n",
+ " 12 67.2 62.8 \n",
+ " 13 64.8 60.2 \n",
+ " 14 67.1 61.0 \n",
+ " 15 62.1 56.8 \n",
+ " 16 57.6 68.9 \n",
+ " 17 71.2 60.8 \n",
+ " 18 74.3 67.4 \n",
+ " 19 60.4 64.5 \n",
+ " 20 78.4 73.3 \n",
+ " 21 67.7 61.6 \n",
+ " 22 65.1 65.3 \n",
+ " 23 54.4 58.0 \n",
+ " 24 79.6 72.8 \n",
+ " 25 60.4 47.5 \n",
+ " 26 73.5 56.1 \n",
+ " 27 50.3 60.1 \n",
+ " 28 69.8 66.4 \n",
+ " 29 78.4 79.3 \n",
+ " 30 58.8 62.5 \n",
+ " 31 60.5 65.1 \n",
+ " 32 63.0 61.0 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 56.1 \n",
+ " 1 62.9 \n",
+ " 2 59.9 \n",
+ " 3 52.5 \n",
+ " 4 54.3 \n",
+ " 5 62.5 \n",
+ " 6 51.0 \n",
+ " 7 51.2 \n",
+ " 8 68.9 \n",
+ " 9 48.1 \n",
+ " 10 69.1 \n",
+ " 11 57.7 \n",
+ " 12 62.9 \n",
+ " 13 54.6 \n",
+ " 14 63.0 \n",
+ " 15 57.7 \n",
+ " 16 68.8 \n",
+ " 17 50.5 \n",
+ " 18 65.9 \n",
+ " 19 62.8 \n",
+ " 20 73.5 \n",
+ " 21 57.1 \n",
+ " 22 59.8 \n",
+ " 23 59.1 \n",
+ " 24 66.3 \n",
+ " 25 47.4 \n",
+ " 26 50.7 \n",
+ " 27 51.3 \n",
+ " 28 61.2 \n",
+ " 29 68.9 \n",
+ " 30 64.6 \n",
+ " 31 63.8 \n",
+ " 32 57.2 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2012': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 64.4 58.9 \n",
+ " 1 67.0 64.7 \n",
+ " 2 66.2 63.7 \n",
+ " 3 62.0 53.1 \n",
+ " 4 67.1 61.4 \n",
+ " 5 62.5 64.3 \n",
+ " 6 67.0 60.8 \n",
+ " 7 54.5 58.5 \n",
+ " 8 66.0 72.1 \n",
+ " 9 43.5 62.5 \n",
+ " 10 70.2 67.7 \n",
+ " 11 68.3 61.9 \n",
+ " 12 68.3 62.9 \n",
+ " 13 62.6 60.3 \n",
+ " 14 68.5 61.3 \n",
+ " 15 57.8 59.4 \n",
+ " 16 57.6 69.9 \n",
+ " 17 69.3 61.9 \n",
+ " 18 74.2 69.6 \n",
+ " 19 61.7 66.2 \n",
+ " 20 79.5 75.4 \n",
+ " 21 68.0 64.0 \n",
+ " 22 57.8 63.3 \n",
+ " 23 50.4 59.4 \n",
+ " 24 78.8 71.8 \n",
+ " 25 59.6 46.6 \n",
+ " 26 73.8 56.9 \n",
+ " 27 41.5 59.9 \n",
+ " 28 67.0 67.6 \n",
+ " 29 77.2 78.6 \n",
+ " 30 47.1 57.6 \n",
+ " 31 56.6 63.0 \n",
+ " 32 62.6 59.9 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 55.3 \n",
+ " 1 62.9 \n",
+ " 2 60.9 \n",
+ " 3 51.4 \n",
+ " 4 51.8 \n",
+ " 5 61.5 \n",
+ " 6 53.9 \n",
+ " 7 52.0 \n",
+ " 8 66.0 \n",
+ " 9 57.5 \n",
+ " 10 70.9 \n",
+ " 11 52.5 \n",
+ " 12 61.0 \n",
+ " 13 57.8 \n",
+ " 14 58.3 \n",
+ " 15 59.0 \n",
+ " 16 67.7 \n",
+ " 17 52.1 \n",
+ " 18 67.1 \n",
+ " 19 63.5 \n",
+ " 20 73.7 \n",
+ " 21 59.8 \n",
+ " 22 59.8 \n",
+ " 23 56.2 \n",
+ " 24 60.5 \n",
+ " 25 46.6 \n",
+ " 26 46.9 \n",
+ " 27 49.9 \n",
+ " 28 59.4 \n",
+ " 29 69.8 \n",
+ " 30 57.5 \n",
+ " 31 64.7 \n",
+ " 32 53.6 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2013': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 61.8 59.3 \n",
+ " 1 65.1 65.2 \n",
+ " 2 63.8 66.2 \n",
+ " 3 59.5 52.2 \n",
+ " 4 63.0 63.8 \n",
+ " 5 60.1 61.6 \n",
+ " 6 66.3 62.2 \n",
+ " 7 55.8 56.4 \n",
+ " 8 63.2 66.0 \n",
+ " 9 38.4 58.2 \n",
+ " 10 69.1 69.9 \n",
+ " 11 64.4 60.6 \n",
+ " 12 63.3 66.0 \n",
+ " 13 60.5 61.9 \n",
+ " 14 66.0 66.3 \n",
+ " 15 50.2 63.1 \n",
+ " 16 56.4 72.8 \n",
+ " 17 68.0 67.2 \n",
+ " 18 73.1 71.1 \n",
+ " 19 61.3 65.7 \n",
+ " 20 76.4 77.6 \n",
+ " 21 66.8 65.8 \n",
+ " 22 52.4 60.5 \n",
+ " 23 47.8 61.8 \n",
+ " 24 77.1 74.3 \n",
+ " 25 58.4 44.8 \n",
+ " 26 70.7 59.6 \n",
+ " 27 35.1 59.7 \n",
+ " 28 62.8 70.3 \n",
+ " 29 76.4 75.2 \n",
+ " 30 41.9 47.7 \n",
+ " 31 53.5 64.7 \n",
+ " 32 63.5 58.7 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 55.4 \n",
+ " 1 62.0 \n",
+ " 2 61.9 \n",
+ " 3 51.0 \n",
+ " 4 52.9 \n",
+ " 5 61.4 \n",
+ " 6 54.1 \n",
+ " 7 48.6 \n",
+ " 8 63.9 \n",
+ " 9 60.3 \n",
+ " 10 70.6 \n",
+ " 11 54.4 \n",
+ " 12 60.4 \n",
+ " 13 57.8 \n",
+ " 14 63.2 \n",
+ " 15 60.9 \n",
+ " 16 65.5 \n",
+ " 17 56.9 \n",
+ " 18 71.0 \n",
+ " 19 55.5 \n",
+ " 20 72.9 \n",
+ " 21 60.0 \n",
+ " 22 61.1 \n",
+ " 23 51.7 \n",
+ " 24 63.1 \n",
+ " 25 46.1 \n",
+ " 26 48.2 \n",
+ " 27 51.8 \n",
+ " 28 59.4 \n",
+ " 29 67.8 \n",
+ " 30 60.5 \n",
+ " 31 65.1 \n",
+ " 32 53.9 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2014': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 58.7 59.6 \n",
+ " 1 60.7 65.6 \n",
+ " 2 60.4 68.1 \n",
+ " 3 56.4 51.9 \n",
+ " 4 61.2 64.5 \n",
+ " 5 56.0 58.6 \n",
+ " 6 57.7 63.7 \n",
+ " 7 53.6 55.5 \n",
+ " 8 61.6 67.2 \n",
+ " 9 35.5 54.8 \n",
+ " 10 64.8 72.0 \n",
+ " 11 60.4 60.3 \n",
+ " 12 56.9 65.2 \n",
+ " 13 59.0 63.6 \n",
+ " 14 62.1 68.2 \n",
+ " 15 42.5 63.9 \n",
+ " 16 52.5 74.8 \n",
+ " 17 65.0 69.4 \n",
+ " 18 71.2 72.7 \n",
+ " 19 60.1 69.4 \n",
+ " 20 71.9 81.3 \n",
+ " 21 64.4 67.9 \n",
+ " 22 46.0 56.6 \n",
+ " 23 45.6 63.0 \n",
+ " 24 75.3 73.7 \n",
+ " 25 56.1 44.9 \n",
+ " 26 73.2 61.2 \n",
+ " 27 30.0 57.5 \n",
+ " 28 58.9 70.9 \n",
+ " 29 75.1 73.4 \n",
+ " 30 35.0 37.8 \n",
+ " 31 46.7 62.0 \n",
+ " 32 62.9 61.1 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 54.5 \n",
+ " 1 58.9 \n",
+ " 2 64.7 \n",
+ " 3 49.3 \n",
+ " 4 52.4 \n",
+ " 5 61.0 \n",
+ " 6 52.9 \n",
+ " 7 49.4 \n",
+ " 8 67.0 \n",
+ " 9 64.4 \n",
+ " 10 63.8 \n",
+ " 11 57.8 \n",
+ " 12 60.8 \n",
+ " 13 61.1 \n",
+ " 14 63.6 \n",
+ " 15 62.2 \n",
+ " 16 69.0 \n",
+ " 17 60.3 \n",
+ " 18 68.6 \n",
+ " 19 61.3 \n",
+ " 20 74.1 \n",
+ " 21 67.8 \n",
+ " 22 60.5 \n",
+ " 23 53.9 \n",
+ " 24 63.1 \n",
+ " 25 43.5 \n",
+ " 26 48.4 \n",
+ " 27 50.4 \n",
+ " 28 59.0 \n",
+ " 29 66.4 \n",
+ " 30 58.7 \n",
+ " 31 64.4 \n",
+ " 32 54.2 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2015': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 55.8 59.2 \n",
+ " 1 57.8 66.0 \n",
+ " 2 57.2 67.6 \n",
+ " 3 53.8 51.1 \n",
+ " 4 57.6 63.6 \n",
+ " 5 53.3 56.2 \n",
+ " 6 53.0 62.4 \n",
+ " 7 50.5 54.1 \n",
+ " 8 59.2 65.1 \n",
+ " 9 32.1 54.7 \n",
+ " 10 62.8 73.7 \n",
+ " 11 55.7 61.7 \n",
+ " 12 53.1 64.9 \n",
+ " 13 55.3 63.7 \n",
+ " 14 60.1 67.5 \n",
+ " 15 36.3 60.4 \n",
+ " 16 49.0 74.2 \n",
+ " 17 62.9 70.4 \n",
+ " 18 69.3 73.1 \n",
+ " 19 58.1 69.9 \n",
+ " 20 68.2 81.0 \n",
+ " 21 62.7 68.6 \n",
+ " 22 41.7 53.1 \n",
+ " 23 43.0 62.6 \n",
+ " 24 72.9 72.3 \n",
+ " 25 54.5 44.4 \n",
+ " 26 69.7 61.2 \n",
+ " 27 26.5 54.1 \n",
+ " 28 56.0 70.5 \n",
+ " 29 73.1 69.4 \n",
+ " 30 30.8 34.0 \n",
+ " 31 42.7 60.6 \n",
+ " 32 62.1 59.7 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 53.7 \n",
+ " 1 58.4 \n",
+ " 2 64.4 \n",
+ " 3 48.3 \n",
+ " 4 52.0 \n",
+ " 5 58.3 \n",
+ " 6 53.3 \n",
+ " 7 49.6 \n",
+ " 8 68.1 \n",
+ " 9 54.5 \n",
+ " 10 65.6 \n",
+ " 11 55.5 \n",
+ " 12 58.9 \n",
+ " 13 61.0 \n",
+ " 14 65.6 \n",
+ " 15 62.6 \n",
+ " 16 69.1 \n",
+ " 17 60.4 \n",
+ " 18 66.0 \n",
+ " 19 57.8 \n",
+ " 20 73.7 \n",
+ " 21 67.6 \n",
+ " 22 60.9 \n",
+ " 23 54.2 \n",
+ " 24 62.0 \n",
+ " 25 41.7 \n",
+ " 26 48.0 \n",
+ " 27 50.1 \n",
+ " 28 58.0 \n",
+ " 29 67.2 \n",
+ " 30 52.3 \n",
+ " 31 61.6 \n",
+ " 32 52.7 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2016': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 53.5 58.2 \n",
+ " 1 55.7 65.6 \n",
+ " 2 54.3 66.9 \n",
+ " 3 51.4 49.7 \n",
+ " 4 56.9 61.2 \n",
+ " 5 51.0 55.3 \n",
+ " 6 47.2 62.7 \n",
+ " 7 46.8 55.5 \n",
+ " 8 57.7 63.3 \n",
+ " 9 28.9 52.9 \n",
+ " 10 61.3 73.2 \n",
+ " 11 54.4 62.0 \n",
+ " 12 52.5 62.7 \n",
+ " 13 53.1 64.2 \n",
+ " 14 57.3 67.5 \n",
+ " 15 30.9 55.6 \n",
+ " 16 45.4 72.9 \n",
+ " 17 60.3 71.4 \n",
+ " 18 65.8 74.0 \n",
+ " 19 56.1 70.7 \n",
+ " 20 66.1 79.4 \n",
+ " 21 61.4 69.9 \n",
+ " 22 39.3 50.9 \n",
+ " 23 40.7 59.8 \n",
+ " 24 70.1 70.2 \n",
+ " 25 51.9 43.3 \n",
+ " 26 69.5 59.3 \n",
+ " 27 24.5 47.9 \n",
+ " 28 54.9 68.4 \n",
+ " 29 71.9 67.3 \n",
+ " 30 30.0 31.1 \n",
+ " 31 40.0 60.1 \n",
+ " 32 59.7 59.9 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 53.9 \n",
+ " 1 60.4 \n",
+ " 2 64.6 \n",
+ " 3 47.8 \n",
+ " 4 53.6 \n",
+ " 5 57.3 \n",
+ " 6 54.4 \n",
+ " 7 51.1 \n",
+ " 8 67.3 \n",
+ " 9 51.1 \n",
+ " 10 69.7 \n",
+ " 11 55.5 \n",
+ " 12 60.9 \n",
+ " 13 58.7 \n",
+ " 14 63.5 \n",
+ " 15 64.1 \n",
+ " 16 68.3 \n",
+ " 17 65.5 \n",
+ " 18 65.4 \n",
+ " 19 64.6 \n",
+ " 20 72.0 \n",
+ " 21 68.0 \n",
+ " 22 59.8 \n",
+ " 23 56.2 \n",
+ " 24 58.3 \n",
+ " 25 41.2 \n",
+ " 26 49.1 \n",
+ " 27 50.7 \n",
+ " 28 61.0 \n",
+ " 29 68.0 \n",
+ " 30 49.8 \n",
+ " 31 61.6 \n",
+ " 32 50.8 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2017': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 51.4 56.8 \n",
+ " 1 53.1 64.8 \n",
+ " 2 52.2 65.3 \n",
+ " 3 49.4 48.3 \n",
+ " 4 54.6 59.2 \n",
+ " 5 48.2 52.5 \n",
+ " 6 42.7 60.5 \n",
+ " 7 45.1 54.0 \n",
+ " 8 54.5 62.9 \n",
+ " 9 26.2 50.3 \n",
+ " 10 59.9 72.7 \n",
+ " 11 53.8 60.4 \n",
+ " 12 44.1 61.6 \n",
+ " 13 50.5 63.3 \n",
+ " 14 54.4 63.7 \n",
+ " 15 28.4 50.8 \n",
+ " 16 43.2 69.6 \n",
+ " 17 59.1 71.3 \n",
+ " 18 62.6 73.2 \n",
+ " 19 55.1 69.8 \n",
+ " 20 62.3 78.4 \n",
+ " 21 59.6 70.4 \n",
+ " 22 36.8 49.4 \n",
+ " 23 37.9 58.2 \n",
+ " 24 68.0 67.8 \n",
+ " 25 50.8 41.2 \n",
+ " 26 66.0 58.0 \n",
+ " 27 23.0 44.4 \n",
+ " 28 53.4 66.9 \n",
+ " 29 71.5 68.3 \n",
+ " 30 29.3 33.3 \n",
+ " 31 37.3 54.6 \n",
+ " 32 56.8 56.1 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 53.1 \n",
+ " 1 59.9 \n",
+ " 2 61.8 \n",
+ " 3 46.7 \n",
+ " 4 54.9 \n",
+ " 5 55.5 \n",
+ " 6 52.2 \n",
+ " 7 48.1 \n",
+ " 8 64.1 \n",
+ " 9 51.2 \n",
+ " 10 69.8 \n",
+ " 11 54.6 \n",
+ " 12 59.5 \n",
+ " 13 56.2 \n",
+ " 14 59.9 \n",
+ " 15 60.8 \n",
+ " 16 67.9 \n",
+ " 17 66.9 \n",
+ " 18 65.2 \n",
+ " 19 52.6 \n",
+ " 20 75.2 \n",
+ " 21 65.7 \n",
+ " 22 57.4 \n",
+ " 23 51.5 \n",
+ " 24 58.6 \n",
+ " 25 39.6 \n",
+ " 26 51.2 \n",
+ " 27 50.5 \n",
+ " 28 62.0 \n",
+ " 29 65.6 \n",
+ " 30 46.4 \n",
+ " 31 60.5 \n",
+ " 32 50.5 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2018': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 48.4 55.0 \n",
+ " 1 49.9 62.6 \n",
+ " 2 48.9 63.4 \n",
+ " 3 46.6 46.8 \n",
+ " 4 52.5 57.0 \n",
+ " 5 44.1 49.8 \n",
+ " 6 36.6 58.4 \n",
+ " 7 41.4 51.5 \n",
+ " 8 51.6 60.7 \n",
+ " 9 25.2 45.0 \n",
+ " 10 57.7 70.7 \n",
+ " 11 51.3 60.2 \n",
+ " 12 38.7 57.7 \n",
+ " 13 47.7 61.2 \n",
+ " 14 50.9 62.2 \n",
+ " 15 24.6 46.0 \n",
+ " 16 41.0 67.5 \n",
+ " 17 57.0 70.7 \n",
+ " 18 57.0 70.8 \n",
+ " 19 50.1 68.2 \n",
+ " 20 60.8 76.4 \n",
+ " 21 56.8 69.9 \n",
+ " 22 34.0 48.1 \n",
+ " 23 34.5 55.4 \n",
+ " 24 64.7 66.6 \n",
+ " 25 48.3 38.8 \n",
+ " 26 63.0 55.2 \n",
+ " 27 21.7 41.4 \n",
+ " 28 51.9 65.4 \n",
+ " 29 67.4 67.6 \n",
+ " 30 27.9 34.6 \n",
+ " 31 32.9 50.0 \n",
+ " 32 54.3 53.8 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 53.2 \n",
+ " 1 61.0 \n",
+ " 2 61.2 \n",
+ " 3 47.0 \n",
+ " 4 54.1 \n",
+ " 5 53.2 \n",
+ " 6 51.2 \n",
+ " 7 48.4 \n",
+ " 8 61.4 \n",
+ " 9 52.5 \n",
+ " 10 70.6 \n",
+ " 11 56.1 \n",
+ " 12 58.8 \n",
+ " 13 56.3 \n",
+ " 14 55.7 \n",
+ " 15 57.4 \n",
+ " 16 67.7 \n",
+ " 17 66.1 \n",
+ " 18 63.2 \n",
+ " 19 60.0 \n",
+ " 20 75.8 \n",
+ " 21 63.9 \n",
+ " 22 55.4 \n",
+ " 23 52.4 \n",
+ " 24 60.3 \n",
+ " 25 38.8 \n",
+ " 26 49.9 \n",
+ " 27 47.4 \n",
+ " 28 63.0 \n",
+ " 29 62.6 \n",
+ " 30 43.8 \n",
+ " 31 58.2 \n",
+ " 32 47.6 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2019': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 44.9 53.3 \n",
+ " 1 46.8 60.9 \n",
+ " 2 45.1 61.3 \n",
+ " 3 42.6 45.2 \n",
+ " 4 50.1 55.0 \n",
+ " 5 41.1 48.3 \n",
+ " 6 30.9 52.9 \n",
+ " 7 36.9 49.6 \n",
+ " 8 49.0 59.5 \n",
+ " 9 24.8 43.4 \n",
+ " 10 55.1 69.7 \n",
+ " 11 46.4 59.8 \n",
+ " 12 35.5 55.8 \n",
+ " 13 44.7 59.9 \n",
+ " 14 45.6 62.2 \n",
+ " 15 21.6 40.5 \n",
+ " 16 37.6 64.8 \n",
+ " 17 53.8 70.6 \n",
+ " 18 51.2 68.4 \n",
+ " 19 45.0 65.0 \n",
+ " 20 57.4 75.1 \n",
+ " 21 53.5 69.5 \n",
+ " 22 30.7 45.6 \n",
+ " 23 30.8 53.4 \n",
+ " 24 60.4 65.9 \n",
+ " 25 43.9 37.5 \n",
+ " 26 61.6 51.5 \n",
+ " 27 20.0 38.7 \n",
+ " 28 49.1 64.2 \n",
+ " 29 65.0 66.2 \n",
+ " 30 27.5 33.9 \n",
+ " 31 30.2 47.7 \n",
+ " 32 44.8 50.5 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 51.1 \n",
+ " 1 59.3 \n",
+ " 2 59.4 \n",
+ " 3 44.7 \n",
+ " 4 50.9 \n",
+ " 5 51.4 \n",
+ " 6 48.3 \n",
+ " 7 46.7 \n",
+ " 8 62.3 \n",
+ " 9 47.7 \n",
+ " 10 68.5 \n",
+ " 11 51.7 \n",
+ " 12 56.8 \n",
+ " 13 54.8 \n",
+ " 14 55.8 \n",
+ " 15 52.0 \n",
+ " 16 66.1 \n",
+ " 17 64.5 \n",
+ " 18 65.5 \n",
+ " 19 59.3 \n",
+ " 20 74.6 \n",
+ " 21 63.1 \n",
+ " 22 50.7 \n",
+ " 23 50.2 \n",
+ " 24 59.5 \n",
+ " 25 36.9 \n",
+ " 26 45.2 \n",
+ " 27 43.0 \n",
+ " 28 59.4 \n",
+ " 29 59.2 \n",
+ " 30 43.9 \n",
+ " 31 56.0 \n",
+ " 32 45.1 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2020': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 41.7 51.6 \n",
+ " 1 44.0 58.8 \n",
+ " 2 40.9 59.3 \n",
+ " 3 39.3 43.8 \n",
+ " 4 48.2 53.7 \n",
+ " 5 38.1 45.4 \n",
+ " 6 29.2 47.8 \n",
+ " 7 34.9 47.4 \n",
+ " 8 45.9 57.8 \n",
+ " 9 25.2 37.0 \n",
+ " 10 52.0 68.6 \n",
+ " 11 41.1 59.7 \n",
+ " 12 32.2 52.9 \n",
+ " 13 41.2 58.6 \n",
+ " 14 39.8 58.2 \n",
+ " 15 18.4 37.1 \n",
+ " 16 33.4 62.5 \n",
+ " 17 50.3 68.4 \n",
+ " 18 43.5 65.2 \n",
+ " 19 39.7 61.2 \n",
+ " 20 53.9 74.8 \n",
+ " 21 50.6 68.8 \n",
+ " 22 28.2 44.7 \n",
+ " 23 27.9 51.2 \n",
+ " 24 55.4 64.5 \n",
+ " 25 40.7 35.4 \n",
+ " 26 61.5 49.7 \n",
+ " 27 18.7 37.4 \n",
+ " 28 46.0 63.8 \n",
+ " 29 61.7 65.3 \n",
+ " 30 24.5 35.0 \n",
+ " 31 29.2 42.5 \n",
+ " 32 39.9 49.1 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 50.3 \n",
+ " 1 58.5 \n",
+ " 2 58.5 \n",
+ " 3 44.2 \n",
+ " 4 48.8 \n",
+ " 5 49.4 \n",
+ " 6 49.1 \n",
+ " 7 46.7 \n",
+ " 8 58.6 \n",
+ " 9 48.3 \n",
+ " 10 67.6 \n",
+ " 11 52.2 \n",
+ " 12 54.8 \n",
+ " 13 52.4 \n",
+ " 14 55.8 \n",
+ " 15 49.2 \n",
+ " 16 62.7 \n",
+ " 17 62.1 \n",
+ " 18 63.7 \n",
+ " 19 58.8 \n",
+ " 20 71.3 \n",
+ " 21 64.9 \n",
+ " 22 49.6 \n",
+ " 23 49.4 \n",
+ " 24 60.4 \n",
+ " 25 35.7 \n",
+ " 26 44.3 \n",
+ " 27 40.0 \n",
+ " 28 58.0 \n",
+ " 29 59.3 \n",
+ " 30 42.7 \n",
+ " 31 51.9 \n",
+ " 32 44.3 \n",
+ " 33 NaN \n",
+ " 34 NaN ,\n",
+ " '2021': id_uf nome \\\n",
+ " 0 0 Brasil \n",
+ " 1 1 Norte \n",
+ " 2 2 Nordeste \n",
+ " 3 3 Sudeste \n",
+ " 4 4 Sul \n",
+ " 5 5 Centro-Oeste \n",
+ " 6 11 Rondônia \n",
+ " 7 12 Acre \n",
+ " 8 13 Amazonas \n",
+ " 9 14 Roraima \n",
+ " 10 15 Pará \n",
+ " 11 16 Amapá \n",
+ " 12 17 Tocantins \n",
+ " 13 21 Maranhão \n",
+ " 14 22 Piauí \n",
+ " 15 23 Ceará \n",
+ " 16 24 Rio Grande do Norte \n",
+ " 17 25 Paraíba \n",
+ " 18 26 Pernambuco \n",
+ " 19 27 Alagoas \n",
+ " 20 28 Sergipe \n",
+ " 21 29 Bahia \n",
+ " 22 31 Minas Gerais \n",
+ " 23 32 Espírito Santo \n",
+ " 24 33 Rio de Janeiro \n",
+ " 25 35 São Paulo \n",
+ " 26 41 Paraná \n",
+ " 27 42 Santa Catarina \n",
+ " 28 43 Rio Grande do Sul \n",
+ " 29 50 Mato Grosso do Sul \n",
+ " 30 51 Mato Grosso \n",
+ " 31 52 Goiás \n",
+ " 32 53 Distrito Federal \n",
+ " 33 NaN NaN \n",
+ " 34 Notas: Foram consideradas a rede total (escola... NaN \n",
+ " \n",
+ " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n",
+ " 0 36.6 49.5 \n",
+ " 1 36.8 57.7 \n",
+ " 2 34.2 56.9 \n",
+ " 3 36.2 41.1 \n",
+ " 4 43.1 51.1 \n",
+ " 5 33.6 43.3 \n",
+ " 6 23.4 43.6 \n",
+ " 7 27.9 46.2 \n",
+ " 8 39.8 56.7 \n",
+ " 9 20.5 35.8 \n",
+ " 10 43.4 68.1 \n",
+ " 11 34.0 58.8 \n",
+ " 12 27.0 48.7 \n",
+ " 13 35.3 56.5 \n",
+ " 14 33.2 56.3 \n",
+ " 15 14.6 33.3 \n",
+ " 16 23.6 60.1 \n",
+ " 17 42.4 65.7 \n",
+ " 18 36.4 60.8 \n",
+ " 19 32.2 57.4 \n",
+ " 20 44.5 72.2 \n",
+ " 21 42.9 66.6 \n",
+ " 22 25.0 40.4 \n",
+ " 23 20.4 47.6 \n",
+ " 24 49.3 63.4 \n",
+ " 25 38.8 32.6 \n",
+ " 26 60.2 46.1 \n",
+ " 27 14.6 35.1 \n",
+ " 28 37.6 61.6 \n",
+ " 29 56.0 63.4 \n",
+ " 30 21.1 32.3 \n",
+ " 31 25.2 39.8 \n",
+ " 32 36.0 48.9 \n",
+ " 33 NaN NaN \n",
+ " 34 NaN NaN \n",
+ " \n",
+ " Ensino Médio Regular \n",
+ " 0 48.3 \n",
+ " 1 57.5 \n",
+ " 2 57.8 \n",
+ " 3 41.8 \n",
+ " 4 44.9 \n",
+ " 5 46.1 \n",
+ " 6 48.7 \n",
+ " 7 44.4 \n",
+ " 8 55.9 \n",
+ " 9 43.7 \n",
+ " 10 66.7 \n",
+ " 11 54.5 \n",
+ " 12 52.9 \n",
+ " 13 50.8 \n",
+ " 14 55.3 \n",
+ " 15 44.4 \n",
+ " 16 59.9 \n",
+ " 17 61.8 \n",
+ " 18 61.3 \n",
+ " 19 58.9 \n",
+ " 20 68.7 \n",
+ " 21 68.6 \n",
+ " 22 45.1 \n",
+ " 23 47.3 \n",
+ " 24 57.8 \n",
+ " 25 34.9 \n",
+ " 26 40.6 \n",
+ " 27 37.1 \n",
+ " 28 53.2 \n",
+ " 29 57.9 \n",
+ " 30 40.7 \n",
+ " 31 46.5 \n",
+ " 32 43.6 \n",
+ " 33 NaN \n",
+ " 34 NaN }"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dfs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sheet: 2007\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2008\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2009\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2010\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2011\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2012\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2013\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2014\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2015\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2016\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2017\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2018\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2019\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2020\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n",
+ "Sheet: 2021\n",
+ "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n",
+ " dtype='object')\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sheet_name, df in dfs.items():\n",
+ " print(f\"Sheet: {sheet_name}\")\n",
+ " print(df.columns) # This will print the column names of each DataFrame\n",
+ " print() # Adds a blank line for readability"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "melted_dataframe = pd.concat(\n",
+ " [\n",
+ " df.pipe(\n",
+ " lambda d: d.loc[(d[\"id_uf\"].astype(str).str.len() == 2)]\n",
+ " )\n",
+ " .pipe(\n",
+ " lambda d: pd.melt(\n",
+ " d,\n",
+ " id_vars=[\"id_uf\", \"nome\"],\n",
+ " value_vars=d.columns.difference([\"id_uf\", \"nome\"]).tolist(), # Convert to list\n",
+ " var_name=\"etapa_ensino\",\n",
+ " value_name=\"tdi\",\n",
+ " )\n",
+ " )\n",
+ " .assign(ano=ano)\n",
+ " for ano, df in dfs.items()\n",
+ " ]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id_uf | \n",
+ " nome | \n",
+ " etapa_ensino | \n",
+ " tdi | \n",
+ " ano | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 11 | \n",
+ " Rondônia | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 65.8 | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 12 | \n",
+ " Acre | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 60.0 | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 13 | \n",
+ " Amazonas | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 75.7 | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 14 | \n",
+ " Roraima | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 69.5 | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 15 | \n",
+ " Pará | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 68.4 | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 76 | \n",
+ " 43 | \n",
+ " Rio Grande do Sul | \n",
+ " Ensino Médio Regular | \n",
+ " 53.2 | \n",
+ " 2021 | \n",
+ "
\n",
+ " \n",
+ " 77 | \n",
+ " 50 | \n",
+ " Mato Grosso do Sul | \n",
+ " Ensino Médio Regular | \n",
+ " 57.9 | \n",
+ " 2021 | \n",
+ "
\n",
+ " \n",
+ " 78 | \n",
+ " 51 | \n",
+ " Mato Grosso | \n",
+ " Ensino Médio Regular | \n",
+ " 40.7 | \n",
+ " 2021 | \n",
+ "
\n",
+ " \n",
+ " 79 | \n",
+ " 52 | \n",
+ " Goiás | \n",
+ " Ensino Médio Regular | \n",
+ " 46.5 | \n",
+ " 2021 | \n",
+ "
\n",
+ " \n",
+ " 80 | \n",
+ " 53 | \n",
+ " Distrito Federal | \n",
+ " Ensino Médio Regular | \n",
+ " 43.6 | \n",
+ " 2021 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1215 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id_uf nome etapa_ensino tdi ano\n",
+ "0 11 Rondônia Ensino Fundamental – Anos Finais 65.8 2007\n",
+ "1 12 Acre Ensino Fundamental – Anos Finais 60.0 2007\n",
+ "2 13 Amazonas Ensino Fundamental – Anos Finais 75.7 2007\n",
+ "3 14 Roraima Ensino Fundamental – Anos Finais 69.5 2007\n",
+ "4 15 Pará Ensino Fundamental – Anos Finais 68.4 2007\n",
+ ".. ... ... ... ... ...\n",
+ "76 43 Rio Grande do Sul Ensino Médio Regular 53.2 2021\n",
+ "77 50 Mato Grosso do Sul Ensino Médio Regular 57.9 2021\n",
+ "78 51 Mato Grosso Ensino Médio Regular 40.7 2021\n",
+ "79 52 Goiás Ensino Médio Regular 46.5 2021\n",
+ "80 53 Distrito Federal Ensino Médio Regular 43.6 2021\n",
+ "\n",
+ "[1215 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "melted_dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31,\n",
+ " 32, 33, 35, 41, 42, 43, 50, 51, 52, 53], dtype=object)"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "melted_dataframe['id_uf'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bd_dir = pd.read_csv(\n",
+ " '/home/vilelaluiza/dados/br_inep_sinopse_estatistica_educacao_basica/input/br_bd_diretorios_brasil.uf.csv'\n",
+ ")\n",
+ "\n",
+ "melted_dataframe[\"nome\"] = (\n",
+ " melted_dataframe[\"nome\"]\n",
+ " .astype(str)\n",
+ " .replace(\n",
+ " {i[\"nome\"]: i[\"sigla\"] for i in bd_dir.to_dict(\"records\")} # type: ignore\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "melted_dataframe = melted_dataframe.rename(\n",
+ " columns={\"nome\": \"sigla_uf\"}, errors=\"raise\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "melted_dataframe = melted_dataframe[\n",
+ " [\n",
+ " \"ano\",\n",
+ " \"sigla_uf\",\n",
+ " \"etapa_ensino\",\n",
+ " \"tdi\",\n",
+ " ]\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ano | \n",
+ " sigla_uf | \n",
+ " etapa_ensino | \n",
+ " tdi | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2007 | \n",
+ " RO | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 65.8 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2007 | \n",
+ " AC | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 60.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2007 | \n",
+ " AM | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 75.7 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2007 | \n",
+ " RR | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 69.5 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2007 | \n",
+ " PA | \n",
+ " Ensino Fundamental – Anos Finais | \n",
+ " 68.4 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 76 | \n",
+ " 2021 | \n",
+ " RS | \n",
+ " Ensino Médio Regular | \n",
+ " 53.2 | \n",
+ "
\n",
+ " \n",
+ " 77 | \n",
+ " 2021 | \n",
+ " MS | \n",
+ " Ensino Médio Regular | \n",
+ " 57.9 | \n",
+ "
\n",
+ " \n",
+ " 78 | \n",
+ " 2021 | \n",
+ " MT | \n",
+ " Ensino Médio Regular | \n",
+ " 40.7 | \n",
+ "
\n",
+ " \n",
+ " 79 | \n",
+ " 2021 | \n",
+ " GO | \n",
+ " Ensino Médio Regular | \n",
+ " 46.5 | \n",
+ "
\n",
+ " \n",
+ " 80 | \n",
+ " 2021 | \n",
+ " DF | \n",
+ " Ensino Médio Regular | \n",
+ " 43.6 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1215 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ano sigla_uf etapa_ensino tdi\n",
+ "0 2007 RO Ensino Fundamental – Anos Finais 65.8\n",
+ "1 2007 AC Ensino Fundamental – Anos Finais 60.0\n",
+ "2 2007 AM Ensino Fundamental – Anos Finais 75.7\n",
+ "3 2007 RR Ensino Fundamental – Anos Finais 69.5\n",
+ "4 2007 PA Ensino Fundamental – Anos Finais 68.4\n",
+ ".. ... ... ... ...\n",
+ "76 2021 RS Ensino Médio Regular 53.2\n",
+ "77 2021 MS Ensino Médio Regular 57.9\n",
+ "78 2021 MT Ensino Médio Regular 40.7\n",
+ "79 2021 GO Ensino Médio Regular 46.5\n",
+ "80 2021 DF Ensino Médio Regular 43.6\n",
+ "\n",
+ "[1215 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "melted_dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path = os.path.join(\n",
+ " OUTPUT, \"educacao_especial_distorcao_idade_serie\"\n",
+ " )\n",
+ "\n",
+ "os.makedirs(path, exist_ok=True)\n",
+ "melted_dataframe.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.17"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/models/br_inep_educacao_especial/code/educacao_especial_uf_taxa_rendimento.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_uf_taxa_rendimento.ipynb
new file mode 100644
index 00000000..cba31907
--- /dev/null
+++ b/models/br_inep_educacao_especial/code/educacao_especial_uf_taxa_rendimento.ipynb
@@ -0,0 +1,815 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import zipfile\n",
+ "import pandas as pd\n",
+ "import basedosdados as bd\n",
+ "\n",
+ "INPUT = os.path.join(os.getcwd(), \"input\")\n",
+ "OUTPUT = os.path.join(os.getcwd(), \"output\")\n",
+ "\n",
+ "os.makedirs(INPUT, exist_ok=True)\n",
+ "os.makedirs(OUTPUT, exist_ok=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def read_sheet(sheet_name: str, skiprows: int = 8) -> pd.DataFrame:\n",
+ " return pd.read_excel(\n",
+ " os.path.join(\n",
+ " INPUT,\n",
+ " \"tx_rend_brasil_regioes_ufs_esp.xlsx\"\n",
+ " ),\n",
+ " skiprows=skiprows,\n",
+ " sheet_name=sheet_name\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "excel_data = pd.ExcelFile(os.path.join(\n",
+ " INPUT,\n",
+ " \"tx_rend_brasil_regioes_ufs_esp.xlsx\"\n",
+ " ))\n",
+ "\n",
+ "# Get the sheet names\n",
+ "sheet_names = excel_data.sheet_names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfs = {\n",
+ " sheet_name: read_sheet(sheet_name)\n",
+ " for sheet_name in sheet_names\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'BRASIL_REGIOES_UFS ': NU_ANO_CENSO UNIDGEO \\\n",
+ " 0 2007 Brasil \n",
+ " 1 2007 Norte \n",
+ " 2 2007 Nordeste \n",
+ " 3 2007 Sudeste \n",
+ " 4 2007 Sul \n",
+ " .. ... ... \n",
+ " 492 2021 Mato Grosso \n",
+ " 493 2021 Goiás \n",
+ " 494 2021 Distrito Federal \n",
+ " 495 NaN NaN \n",
+ " 496 Fonte: Censo da Educação Básica 2021/INEP. NaN \n",
+ " \n",
+ " NO_CATEGORIA NO_DEPENDENCIA 1_CAT_FUN 1_CAT_FUN_AI 1_CAT_FUN_AF \\\n",
+ " 0 Total Total 73.1 71.6 78.7 \n",
+ " 1 Total Total 63.2 61.4 75.5 \n",
+ " 2 Total Total 67.0 65.7 73.0 \n",
+ " 3 Total Total 77.0 75.6 81.5 \n",
+ " 4 Total Total 76.8 76.4 78.1 \n",
+ " .. ... ... ... ... ... \n",
+ " 492 Total Total 96.3 96.2 96.4 \n",
+ " 493 Total Total 96.5 95.2 97.6 \n",
+ " 494 Total Total 89.1 83.1 95.7 \n",
+ " 495 NaN NaN NaN NaN NaN \n",
+ " 496 NaN NaN NaN NaN NaN \n",
+ " \n",
+ " 1_CAT_MED 2_CAT_FUN 2_CAT_FUN_AI 2_CAT_FUN_AF 2_CAT_MED 3_CAT_FUN \\\n",
+ " 0 79.6 22.1 23.8 16.2 13.0 4.8 \n",
+ " 1 77.5 28.6 30.4 16.6 11.1 8.2 \n",
+ " 2 77.0 24.7 26.2 17.9 11.3 8.3 \n",
+ " 3 79.6 19.9 21.4 14.7 14.9 3.1 \n",
+ " 4 81.4 20.4 21.0 18.3 11.5 2.8 \n",
+ " .. ... ... ... ... ... ... \n",
+ " 492 81.9 3.1 3.4 2.7 13.4 0.6 \n",
+ " 493 97.8 2.8 4.2 1.6 1.3 0.7 \n",
+ " 494 92.2 10.6 16.4 4.1 7.3 0.3 \n",
+ " 495 NaN NaN NaN NaN NaN NaN \n",
+ " 496 NaN NaN NaN NaN NaN NaN \n",
+ " \n",
+ " 3_CAT_FUN_AI 3_CAT_FUN_AF 3_CAT_MED \n",
+ " 0 4.6 5.1 7.4 \n",
+ " 1 8.2 7.9 11.4 \n",
+ " 2 8.1 9.1 11.7 \n",
+ " 3 3.0 3.8 5.5 \n",
+ " 4 2.6 3.6 7.1 \n",
+ " .. ... ... ... \n",
+ " 492 0.4 0.9 4.7 \n",
+ " 493 0.6 0.8 0.9 \n",
+ " 494 0.5 0.2 0.5 \n",
+ " 495 NaN NaN NaN \n",
+ " 496 NaN NaN NaN \n",
+ " \n",
+ " [497 rows x 16 columns]}"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dfs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sheet: BRASIL_REGIOES_UFS \n",
+ "Index(['NU_ANO_CENSO', 'UNIDGEO', 'NO_CATEGORIA', 'NO_DEPENDENCIA',\n",
+ " '1_CAT_FUN', '1_CAT_FUN_AI', '1_CAT_FUN_AF', '1_CAT_MED', '2_CAT_FUN',\n",
+ " '2_CAT_FUN_AI', '2_CAT_FUN_AF', '2_CAT_MED', '3_CAT_FUN',\n",
+ " '3_CAT_FUN_AI', '3_CAT_FUN_AF', '3_CAT_MED'],\n",
+ " dtype='object')\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sheet_name, df in dfs.items():\n",
+ " print(f\"Sheet: {sheet_name}\")\n",
+ " print(df.columns) # This will print the column names of each DataFrame\n",
+ " print() # Adds a blank line for readability\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "RENAME_COLUMNS = {\n",
+ " 'NU_ANO_CENSO':'ano', \n",
+ " 'UNIDGEO':'nome_uf',\n",
+ " '1_CAT_FUN_AI':'taxaaprovacao_anosiniciais', \n",
+ " '1_CAT_FUN_AF':'taxaaprovacao_anosfinais', \n",
+ " '1_CAT_MED':'taxaaprovacao_ensinomedio', \n",
+ " '2_CAT_FUN_AI':'taxareprovacao_anosiniciais', \n",
+ " '2_CAT_FUN_AF':'taxareprovacao_anosfinais', \n",
+ " '2_CAT_MED':'taxareprovacao_ensinomedio', \n",
+ " '3_CAT_FUN_AI':'taxaabandono_anosiniciais', \n",
+ " '3_CAT_FUN_AF':'taxaabandono_anosfinais', \n",
+ " '3_CAT_MED' : 'taxaabandono_ensinomedio' \n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n",
+ " cols_drop = [\n",
+ " col\n",
+ " for col in df.columns\n",
+ " if col.startswith(\"NO_\") \n",
+ " or col.startswith(\"1_\") \n",
+ " or col.startswith(\"2_\") \n",
+ " or col.startswith(\"3_\")\n",
+ " ]\n",
+ "\n",
+ " return df.drop(columns=cols_drop)\n",
+ "\n",
+ "dfs = {\n",
+ " name: drop_unused_columns(\n",
+ " df.rename(columns=RENAME_COLUMNS, errors=\"raise\")\n",
+ " )\n",
+ " for name, df in dfs.items()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'BRASIL_REGIOES_UFS ': ano nome_uf \\\n",
+ " 0 2007 Brasil \n",
+ " 1 2007 Norte \n",
+ " 2 2007 Nordeste \n",
+ " 3 2007 Sudeste \n",
+ " 4 2007 Sul \n",
+ " .. ... ... \n",
+ " 492 2021 Mato Grosso \n",
+ " 493 2021 Goiás \n",
+ " 494 2021 Distrito Federal \n",
+ " 495 NaN NaN \n",
+ " 496 Fonte: Censo da Educação Básica 2021/INEP. NaN \n",
+ " \n",
+ " taxaaprovacao_anosiniciais taxaaprovacao_anosfinais \\\n",
+ " 0 71.6 78.7 \n",
+ " 1 61.4 75.5 \n",
+ " 2 65.7 73.0 \n",
+ " 3 75.6 81.5 \n",
+ " 4 76.4 78.1 \n",
+ " .. ... ... \n",
+ " 492 96.2 96.4 \n",
+ " 493 95.2 97.6 \n",
+ " 494 83.1 95.7 \n",
+ " 495 NaN NaN \n",
+ " 496 NaN NaN \n",
+ " \n",
+ " taxaaprovacao_ensinomedio taxareprovacao_anosiniciais \\\n",
+ " 0 79.6 23.8 \n",
+ " 1 77.5 30.4 \n",
+ " 2 77.0 26.2 \n",
+ " 3 79.6 21.4 \n",
+ " 4 81.4 21.0 \n",
+ " .. ... ... \n",
+ " 492 81.9 3.4 \n",
+ " 493 97.8 4.2 \n",
+ " 494 92.2 16.4 \n",
+ " 495 NaN NaN \n",
+ " 496 NaN NaN \n",
+ " \n",
+ " taxareprovacao_anosfinais taxareprovacao_ensinomedio \\\n",
+ " 0 16.2 13.0 \n",
+ " 1 16.6 11.1 \n",
+ " 2 17.9 11.3 \n",
+ " 3 14.7 14.9 \n",
+ " 4 18.3 11.5 \n",
+ " .. ... ... \n",
+ " 492 2.7 13.4 \n",
+ " 493 1.6 1.3 \n",
+ " 494 4.1 7.3 \n",
+ " 495 NaN NaN \n",
+ " 496 NaN NaN \n",
+ " \n",
+ " taxaabandono_anosiniciais taxaabandono_anosfinais \\\n",
+ " 0 4.6 5.1 \n",
+ " 1 8.2 7.9 \n",
+ " 2 8.1 9.1 \n",
+ " 3 3.0 3.8 \n",
+ " 4 2.6 3.6 \n",
+ " .. ... ... \n",
+ " 492 0.4 0.9 \n",
+ " 493 0.6 0.8 \n",
+ " 494 0.5 0.2 \n",
+ " 495 NaN NaN \n",
+ " 496 NaN NaN \n",
+ " \n",
+ " taxaabandono_ensinomedio \n",
+ " 0 7.4 \n",
+ " 1 11.4 \n",
+ " 2 11.7 \n",
+ " 3 5.5 \n",
+ " 4 7.1 \n",
+ " .. ... \n",
+ " 492 4.7 \n",
+ " 493 0.9 \n",
+ " 494 0.5 \n",
+ " 495 NaN \n",
+ " 496 NaN \n",
+ " \n",
+ " [497 rows x 11 columns]}"
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dfs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Sheet: BRASIL_REGIOES_UFS \n",
+ "Index(['ano', 'nome_uf', 'taxaaprovacao_anosiniciais',\n",
+ " 'taxaaprovacao_anosfinais', 'taxaaprovacao_ensinomedio',\n",
+ " 'taxareprovacao_anosiniciais', 'taxareprovacao_anosfinais',\n",
+ " 'taxareprovacao_ensinomedio', 'taxaabandono_anosiniciais',\n",
+ " 'taxaabandono_anosfinais', 'taxaabandono_ensinomedio'],\n",
+ " dtype='object')\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "for sheet_name, df in dfs.items():\n",
+ " print(f\"Sheet: {sheet_name}\")\n",
+ " print(df.columns) # This will print the column names of each DataFrame\n",
+ " print() # Adds a blank line for readability"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "melted_dataframe = pd.concat(\n",
+ " [\n",
+ " df.pipe(\n",
+ " lambda d: d.loc[(d[\"nome_uf\"].notna()) & (d[\"nome_uf\"] != \" \")]\n",
+ " )\n",
+ " .pipe(\n",
+ " lambda d: pd.melt(\n",
+ " d,\n",
+ " id_vars=[\"ano\", \"nome_uf\"],\n",
+ " value_vars=d.columns.difference([\"ano\", \"nome_uf\"]).tolist(), # Convert to list\n",
+ " var_name=\"metrica\",\n",
+ " value_name=\"valor\",\n",
+ " )\n",
+ " )\n",
+ " ]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ano | \n",
+ " nome_uf | \n",
+ " metrica | \n",
+ " valor | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2007 | \n",
+ " Brasil | \n",
+ " taxaabandono_anosfinais | \n",
+ " 5.1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2007 | \n",
+ " Norte | \n",
+ " taxaabandono_anosfinais | \n",
+ " 7.9 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2007 | \n",
+ " Nordeste | \n",
+ " taxaabandono_anosfinais | \n",
+ " 9.1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2007 | \n",
+ " Sudeste | \n",
+ " taxaabandono_anosfinais | \n",
+ " 3.8 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2007 | \n",
+ " Sul | \n",
+ " taxaabandono_anosfinais | \n",
+ " 3.6 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 4450 | \n",
+ " 2021 | \n",
+ " Rio Grande do Sul | \n",
+ " taxareprovacao_ensinomedio | \n",
+ " 1.5 | \n",
+ "
\n",
+ " \n",
+ " 4451 | \n",
+ " 2021 | \n",
+ " Mato Grosso do Sul | \n",
+ " taxareprovacao_ensinomedio | \n",
+ " 8.4 | \n",
+ "
\n",
+ " \n",
+ " 4452 | \n",
+ " 2021 | \n",
+ " Mato Grosso | \n",
+ " taxareprovacao_ensinomedio | \n",
+ " 13.4 | \n",
+ "
\n",
+ " \n",
+ " 4453 | \n",
+ " 2021 | \n",
+ " Goiás | \n",
+ " taxareprovacao_ensinomedio | \n",
+ " 1.3 | \n",
+ "
\n",
+ " \n",
+ " 4454 | \n",
+ " 2021 | \n",
+ " Distrito Federal | \n",
+ " taxareprovacao_ensinomedio | \n",
+ " 7.3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4455 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ano nome_uf metrica valor\n",
+ "0 2007 Brasil taxaabandono_anosfinais 5.1\n",
+ "1 2007 Norte taxaabandono_anosfinais 7.9\n",
+ "2 2007 Nordeste taxaabandono_anosfinais 9.1\n",
+ "3 2007 Sudeste taxaabandono_anosfinais 3.8\n",
+ "4 2007 Sul taxaabandono_anosfinais 3.6\n",
+ "... ... ... ... ...\n",
+ "4450 2021 Rio Grande do Sul taxareprovacao_ensinomedio 1.5\n",
+ "4451 2021 Mato Grosso do Sul taxareprovacao_ensinomedio 8.4\n",
+ "4452 2021 Mato Grosso taxareprovacao_ensinomedio 13.4\n",
+ "4453 2021 Goiás taxareprovacao_ensinomedio 1.3\n",
+ "4454 2021 Distrito Federal taxareprovacao_ensinomedio 7.3\n",
+ "\n",
+ "[4455 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "melted_dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "melted_dataframe['etapa_ensino'] = melted_dataframe['metrica'].apply(\n",
+ " lambda v: v.split('_')[-1]) # Extracts 'anosiniciais', 'anosfinais', or 'ensinomedio'\n",
+ "melted_dataframe['tipo_metrica'] = melted_dataframe['metrica'].apply(\n",
+ " lambda v: v.split('_')[0]) # Extracts 'taxaaprovacao', 'taxareprovacao', 'taxaabandono'\n",
+ "\n",
+ "# Pivoting the melted DataFrame to get desired columns\n",
+ "df_final = melted_dataframe.pivot_table(index=['ano', 'nome_uf', 'etapa_ensino'], \n",
+ " columns='tipo_metrica', \n",
+ " values='valor').reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "RENAME_COLUMNS_MELTED = {\n",
+ " 'taxaabandono':'taxa_abandono', \n",
+ " 'taxaaprovacao':'taxa_aprovacao',\n",
+ " 'taxareprovacao':'taxa_reprovacao' \n",
+ "}\n",
+ "\n",
+ "etapa_ensino = {\n",
+ " 'anosiniciais': 'Ensino Fundamental – Anos Iniciais',\n",
+ " 'anosfinais':'Ensino Fundamental – Anos Finais',\n",
+ " 'ensinomedio':'Ensino Médio Regular'\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " tipo_metrica | \n",
+ " ano | \n",
+ " nome_uf | \n",
+ " etapa_ensino | \n",
+ " taxaabandono | \n",
+ " taxaaprovacao | \n",
+ " taxareprovacao | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2007 | \n",
+ " Acre | \n",
+ " anosfinais | \n",
+ " 0.0 | \n",
+ " 91.9 | \n",
+ " 8.1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2007 | \n",
+ " Acre | \n",
+ " anosiniciais | \n",
+ " 4.8 | \n",
+ " 70.5 | \n",
+ " 24.7 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2007 | \n",
+ " Acre | \n",
+ " ensinomedio | \n",
+ " 6.1 | \n",
+ " 84.8 | \n",
+ " 9.1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2007 | \n",
+ " Alagoas | \n",
+ " anosfinais | \n",
+ " 16.3 | \n",
+ " 64.8 | \n",
+ " 18.9 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2007 | \n",
+ " Alagoas | \n",
+ " anosiniciais | \n",
+ " 9.0 | \n",
+ " 62.2 | \n",
+ " 28.8 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1480 | \n",
+ " 2021 | \n",
+ " São Paulo | \n",
+ " anosiniciais | \n",
+ " 0.5 | \n",
+ " 69.8 | \n",
+ " 29.7 | \n",
+ "
\n",
+ " \n",
+ " 1481 | \n",
+ " 2021 | \n",
+ " São Paulo | \n",
+ " ensinomedio | \n",
+ " 3.6 | \n",
+ " 94.3 | \n",
+ " 2.1 | \n",
+ "
\n",
+ " \n",
+ " 1482 | \n",
+ " 2021 | \n",
+ " Tocantins | \n",
+ " anosfinais | \n",
+ " 1.2 | \n",
+ " 96.2 | \n",
+ " 2.6 | \n",
+ "
\n",
+ " \n",
+ " 1483 | \n",
+ " 2021 | \n",
+ " Tocantins | \n",
+ " anosiniciais | \n",
+ " 0.4 | \n",
+ " 88.4 | \n",
+ " 11.2 | \n",
+ "
\n",
+ " \n",
+ " 1484 | \n",
+ " 2021 | \n",
+ " Tocantins | \n",
+ " ensinomedio | \n",
+ " 2.5 | \n",
+ " 95.2 | \n",
+ " 2.3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1485 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ "tipo_metrica ano nome_uf etapa_ensino taxaabandono taxaaprovacao \\\n",
+ "0 2007 Acre anosfinais 0.0 91.9 \n",
+ "1 2007 Acre anosiniciais 4.8 70.5 \n",
+ "2 2007 Acre ensinomedio 6.1 84.8 \n",
+ "3 2007 Alagoas anosfinais 16.3 64.8 \n",
+ "4 2007 Alagoas anosiniciais 9.0 62.2 \n",
+ "... ... ... ... ... ... \n",
+ "1480 2021 São Paulo anosiniciais 0.5 69.8 \n",
+ "1481 2021 São Paulo ensinomedio 3.6 94.3 \n",
+ "1482 2021 Tocantins anosfinais 1.2 96.2 \n",
+ "1483 2021 Tocantins anosiniciais 0.4 88.4 \n",
+ "1484 2021 Tocantins ensinomedio 2.5 95.2 \n",
+ "\n",
+ "tipo_metrica taxareprovacao \n",
+ "0 8.1 \n",
+ "1 24.7 \n",
+ "2 9.1 \n",
+ "3 18.9 \n",
+ "4 28.8 \n",
+ "... ... \n",
+ "1480 29.7 \n",
+ "1481 2.1 \n",
+ "1482 2.6 \n",
+ "1483 11.2 \n",
+ "1484 2.3 \n",
+ "\n",
+ "[1485 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 76,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_final"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_final = df_final.rename(columns=RENAME_COLUMNS_MELTED)\n",
+ "df_final['etapa_ensino'] = df_final['etapa_ensino'].replace(etapa_ensino)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "bd_dir = pd.read_csv(\n",
+ " '/home/vilelaluiza/dados/br_inep_sinopse_estatistica_educacao_basica/input/br_bd_diretorios_brasil.uf.csv'\n",
+ ")\n",
+ "\n",
+ "# Perform an inner merge based on 'nome_uf' and 'nome'\n",
+ "df_final = pd.merge(df_final, bd_dir[['nome', 'sigla']], how='inner', left_on='nome_uf', right_on='nome')\n",
+ "\n",
+ "# Rename the 'sigla' column to 'sigla_uf' and drop the 'nome' column\n",
+ "df_final = df_final.rename(columns={'sigla': 'sigla_uf'}).drop(columns=['nome_uf','nome'])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['AC', 'AL', 'AP', 'AM', 'BA', 'CE', 'DF', 'ES', 'GO', 'MA', 'MT',\n",
+ " 'MS', 'MG', 'PR', 'PB', 'PA', 'PE', 'PI', 'RN', 'RS', 'RJ', 'RO',\n",
+ " 'RR', 'SC', 'SE', 'SP', 'TO'], dtype=object)"
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_final['sigla_uf'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_final = df_final[['ano', 'sigla_uf', 'etapa_ensino', 'taxa_aprovacao','taxa_reprovacao','taxa_abandono']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path = os.path.join(\n",
+ " OUTPUT, \"educacao_especial_taxa_rendimento\"\n",
+ " )\n",
+ "\n",
+ "os.makedirs(path, exist_ok=True)\n",
+ "df_final.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.17"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/models/br_inep_educacao_especial/schema.yml b/models/br_inep_educacao_especial/schema.yml
new file mode 100644
index 00000000..cb02eedd
--- /dev/null
+++ b/models/br_inep_educacao_especial/schema.yml
@@ -0,0 +1,391 @@
+---
+version: 2
+models:
+ - name: br_inep_educacao_especial__etapa_ensino
+ description: Número de alunos matriculados na Educação Especial ao longo do tempo
+ por tipo de classe (Comuns ou Exclusivas) e Etapa de Ensino
+ tests:
+ - not_null_proportion_multiple_columns:
+ at_least: 0.05
+ columns:
+ - name: ano
+ description: Ano
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__ano')
+ field: ano.ano
+ - name: sigla_uf
+ description: Sigla da Unidade da Federação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ - name: id_municipio
+ description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
+ - name: tipo_classe
+ description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes
+ Exclusivas/Especiais)
+ - name: etapa_ensino
+ description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos
+ Iniciais, Anos Finais, Ensino Fundamental - EJA, ...)
+ - name: quantidade_matricula
+ description: Número de matrículas
+ - name: br_inep_educacao_especial__faixa_etaria
+ description: Número de alunos matriculados na Educação Especial ao longo do tempo
+ por tipo de classe (Comuns ou Exclusivas) e faixa etária
+ tests:
+ - not_null_proportion_multiple_columns:
+ at_least: 0.05
+ columns:
+ - name: ano
+ description: Ano
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__ano')
+ field: ano.ano
+ - name: sigla_uf
+ description: Sigla da Unidade da Federação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ - name: id_municipio
+ description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
+ - name: tipo_classe
+ description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes
+ Exclusivas/Especiais)
+ - name: faixa_etaria
+ description: Faixa etária
+ - name: quantidade_matricula
+ description: Número de matrículas
+ - name: br_inep_educacao_especial__localizacao
+ description: Número de alunos matriculados na Educação Especial ao longo do tempo
+ por tipo de classe, rede e localização
+ tests:
+ - not_null_proportion_multiple_columns:
+ at_least: 0.05
+ columns:
+ - name: ano
+ description: Ano
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__ano')
+ field: ano.ano
+ - name: sigla_uf
+ description: Sigla da Unidade da Federação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ - name: id_municipio
+ description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
+ - name: tipo_classe
+ description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes
+ Exclusivas/Especiais)
+ - name: rede
+ description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada)
+ - name: localizacao
+ description: Localização (e.g. Zona Urbana, Zona Rural)
+ - name: quantidade_matricula
+ description: Número de matrículas
+ - name: br_inep_educacao_especial__sexo_raca_cor
+ description: Número de alunos matriculados na Educação Especial ao longo do tempo
+ por tipo de classe, sexo e raça/cor
+ tests:
+ - not_null_proportion_multiple_columns:
+ at_least: 0.05
+ columns:
+ - name: ano
+ description: Ano
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__ano')
+ field: ano.ano
+ - name: sigla_uf
+ description: Sigla da Unidade da Federação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ - name: id_municipio
+ description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
+ - name: tipo_classe
+ description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes
+ Exclusivas/Especiais)
+ - name: sexo
+ description: Sexo
+ - name: raca_cor
+ description: Raça/Cor
+ - name: quantidade_matricula
+ description: Número de matrículas
+ - name: br_inep_educacao_especial__tempo_ensino
+ description: Número de alunos matriculados na Educação Especial ao longo do tempo
+ por tipo de classe, rede e tempo de ensino (Integral ou Parcial)
+ tests:
+ - not_null_proportion_multiple_columns:
+ at_least: 0.05
+ columns:
+ - name: ano
+ description: Ano
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__ano')
+ field: ano.ano
+ - name: sigla_uf
+ description: Sigla da Unidade da Federação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ - name: id_municipio
+ description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
+ - name: tipo_classe
+ description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes
+ Exclusivas/Especiais)
+ - name: rede
+ description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada)
+ - name: tempo_ensino
+ description: Classificação em tempo integral e tempo parcial
+ - name: quantidade_matricula
+ description: Número de matrículas
+ - name: br_inep_educacao_especial__tipo_deficiencia
+ description: Número de alunos matriculados na Educação Especial ao longo do tempo
+ por tipo de classe e tipo de deficiência, transtorno global do desenvolvimento
+ ou altas habilidades/superdotação
+ tests:
+ - not_null_proportion_multiple_columns:
+ at_least: 0.05
+ columns:
+ - name: ano
+ description: Ano
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__ano')
+ field: ano.ano
+ - name: sigla_uf
+ description: Sigla da Unidade da Federação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ - name: id_municipio
+ description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
+ - name: tipo_classe
+ description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes
+ Exclusivas/Especiais)
+ - name: tipo_deficiêcia
+ description: Tipo de deficiência, transtorno global do desenvolvimento ou
+ altas habilidades/superdotação
+ - name: quantidade_matricula
+ description: Número de matrículas
+ - name: br_inep_educacao_especial__uf_distorcao_idade_serie
+ description: 'Taxa de distorção idade-série da Educação Especial por UF. Dados
+ obtidos via Pedido LAI (nº protocolo: 23546066150202238)'
+ tests:
+ - not_null_proportion_multiple_columns:
+ at_least: 0.05
+ columns:
+ - name: ano
+ description: Ano
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__ano')
+ field: ano.ano
+ - name: sigla_uf
+ description: Sigla da Unidade da Federação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ - name: etapa_ensino
+ description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos
+ Iniciais, Anos Finais, Ensino Fundamental - EJA, ...)
+ - name: tdi
+ description: Taxa de distorção idade-série
+ - name: br_inep_educacao_especial__brasil_distorcao_idade_serie
+ description: 'Taxa de distorção idade-série da Educação Especial no Brasil. Dados
+ obtidos via Pedido LAI (nº protocolo: 23546066150202238)'
+ tests:
+ - not_null_proportion_multiple_columns:
+ at_least: 0.05
+ columns:
+ - name: ano
+ description: Ano
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__ano')
+ field: ano.ano
+ - name: etapa_ensino
+ description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos
+ Iniciais, Anos Finais, Ensino Fundamental - EJA, ...)
+ - name: tdi
+ description: Taxa de distorção idade-série
+ - name: br_inep_educacao_especial__uf_taxa_rendimento
+ description: 'Taxas de rendimento (aprovação, reprovação e abandono) da Educação
+ Especial por UF. Dados obtidos via Pedido LAI (nº protocolo: 23546054413202266)'
+ tests:
+ - not_null_proportion_multiple_columns:
+ at_least: 0.05
+ columns:
+ - name: ano
+ description: Ano
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__ano')
+ field: ano.ano
+ - name: sigla_uf
+ description: Sigla da Unidade da Federação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ - name: etapa_ensino
+ description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos
+ Iniciais, Anos Finais, Ensino Fundamental - EJA, ...)
+ - name: taxa_aprovacao
+ description: Taxa de aprovação
+ - name: taxa_reprovacao
+ description: Taxa de reprovação
+ - name: taxa_abandono
+ description: Taxa de abandono
+ - name: br_inep_educacao_especial__brasil_taxa_rendimento
+ description: 'Taxas de rendimento (aprovação, reprovação e abandono) da Educação
+ Especial no Brasil. Dados obtidos via Pedido LAI (nº protocolo: 23546054413202266)'
+ tests:
+ - not_null_proportion_multiple_columns:
+ at_least: 0.05
+ columns:
+ - name: ano
+ description: Ano
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__ano')
+ field: ano.ano
+ - name: etapa_ensino
+ description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos
+ Iniciais, Anos Finais, Ensino Fundamental - EJA, ...)
+ - name: taxa_aprovacao
+ description: Taxa de aprovação
+ - name: taxa_reprovacao
+ description: Taxa de reprovação
+ - name: taxa_abandono
+ description: Taxa de abandono
+ - name: br_inep_educacao_especial__docente_aee
+ description: 'Quantidade de docentes regentes e do Atendimento Educacional Especializado
+ (AEE) no total e com Formação Continuada em Educação Especial. Dados obtidos
+ via Pedido LAI (nº protocolo: 23546035869202316)'
+ tests:
+ - not_null_proportion_multiple_columns:
+ at_least: 0.05
+ columns:
+ - name: ano
+ description: Ano
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__ano')
+ field: ano.ano
+ - name: sigla_uf
+ description: Sigla da Unidade da Federação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ - name: id_municipio
+ description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
+ - name: quantidade_docente_regente
+ description: Número de professores regentes
+ - name: quantidade_docente_aee
+ description: Número de professores do Atendimento Educacional Especializado
+ (AEE)
+ - name: quantidade_docente_regente_formacao_continuada
+ description: Número de professores regentes com formação continuada sobre
+ Educação Especial
+ - name: quantidade_docente_aee_formacao_continuada
+ description: Número de professores do Atendimento Educacional Especializado
+ (AEE) com formação continuada sobre Educação Especial
+ - name: br_inep_educacao_especial__docente_formacao
+ description: 'Quantidade de docentes da Educação Básica Formação Continuada em
+ Educação Especial por rede de ensino. Dados obtidos via Pedido LAI (nº protocolo:
+ 23546049990202406)'
+ tests:
+ - not_null_proportion_multiple_columns:
+ at_least: 0.05
+ columns:
+ - name: ano
+ description: Ano
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__ano')
+ field: ano.ano
+ - name: sigla_uf
+ description: Sigla da Unidade da Federação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ - name: id_municipio
+ description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
+ - name: rede
+ description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada)
+ - name: quantidade_docente_formacao_continuada
+ description: Quantidade de docentes da Educação Básica com formação continuada
+ em Educação Especial
+ - name: br_inep_educacao_especial__matricula_aee
+ description: 'Quantidade de matrículas no Atendimento Educacional Especializado
+ (AEE) por unidade da federação e rede de ensino. Dados obtidos via Pedido LAI
+ (nº protocolo: 23546086048202330)'
+ tests:
+ - not_null_proportion_multiple_columns:
+ at_least: 0.05
+ columns:
+ - name: ano
+ description: Ano
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__ano')
+ field: ano.ano
+ - name: sigla_uf
+ description: Sigla da Unidade da Federação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ - name: rede
+ description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada)
+ - name: quantidade_matricula
+ description: Número de matrículas na Educação Especial
+ - name: quantidade_matricula_aee
+ description: Número de matrículas no Atendimento Educacional Especializado
+ (AEE)
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_deficiencia.sql b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_deficiencia.sql
new file mode 100644
index 00000000..384fa0e2
--- /dev/null
+++ b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_deficiencia.sql
@@ -0,0 +1,23 @@
+{{
+ config(
+ alias="docente_deficiencia",
+ schema="br_inep_sinopse_estatistica_educacao_basica",
+ materialized="table",
+ partition_by={
+ "field": "ano",
+ "data_type": "int64",
+ "range": {"start": 2012, "end": 2023, "interval": 1},
+ },
+ cluster_by="sigla_uf",
+ )
+}}
+select
+ safe_cast(ano as int64) ano,
+ safe_cast(sigla_uf as string) sigla_uf,
+ safe_cast(id_municipio as string) id_municipio,
+ safe_cast(tipo_classe as string) tipo_classe,
+ safe_cast(deficiencia as string) deficiencia,
+ safe_cast(quantidade_docente as int64) quantidade_docente,
+from
+ `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.docente_deficiencia`
+ as t
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_escolaridade.sql b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_escolaridade.sql
new file mode 100644
index 00000000..77a3b180
--- /dev/null
+++ b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_escolaridade.sql
@@ -0,0 +1,23 @@
+{{
+ config(
+ alias="docente_escolaridade",
+ schema="br_inep_sinopse_estatistica_educacao_basica",
+ materialized="table",
+ partition_by={
+ "field": "ano",
+ "data_type": "int64",
+ "range": {"start": 2007, "end": 2023, "interval": 1},
+ },
+ cluster_by="sigla_uf",
+ )
+}}
+select
+ safe_cast(ano as int64) ano,
+ safe_cast(sigla_uf as string) sigla_uf,
+ safe_cast(id_municipio as string) id_municipio,
+ safe_cast(tipo_classe as string) tipo_classe,
+ safe_cast(escolaridade as string) escolaridade,
+ safe_cast(replace(quantidade_docente, ".0", "") as int64) quantidade_docente,
+from
+ `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.docente_escolaridade`
+ as t
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_etapa_ensino.sql b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_etapa_ensino.sql
new file mode 100644
index 00000000..73424c9e
--- /dev/null
+++ b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_etapa_ensino.sql
@@ -0,0 +1,24 @@
+{{
+ config(
+ alias="docente_etapa_ensino",
+ schema="br_inep_sinopse_estatistica_educacao_basica",
+ materialized="table",
+ partition_by={
+ "field": "ano",
+ "data_type": "int64",
+ "range": {"start": 2007, "end": 2023, "interval": 1},
+ },
+ cluster_by="sigla_uf",
+ )
+}}
+
+select
+ safe_cast(ano as int64) ano,
+ safe_cast(sigla_uf as string) sigla_uf,
+ safe_cast(replace(id_municipio, ".0", "") as string) id_municipio,
+ safe_cast(tipo_classe as string) tipo_classe,
+ safe_cast(etapa_ensino as string) etapa_ensino,
+ safe_cast(replace(quantidade_docentes, ".0", "") as int64) quantidade_docente,
+from
+ `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.docente_etapa_ensino`
+ as t
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_faixa_etaria.sql b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_faixa_etaria_sexo.sql
similarity index 77%
rename from models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_faixa_etaria.sql
rename to models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_faixa_etaria_sexo.sql
index a101d605..bd7e3636 100644
--- a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_faixa_etaria.sql
+++ b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_faixa_etaria_sexo.sql
@@ -1,6 +1,6 @@
{{
config(
- alias="educacao_especial_faixa_etaria",
+ alias="docente_faixa_etaria_sexo",
schema="br_inep_sinopse_estatistica_educacao_basica",
materialized="table",
partition_by={
@@ -17,7 +17,8 @@ select
safe_cast(id_municipio as string) id_municipio,
safe_cast(tipo_classe as string) tipo_classe,
safe_cast(faixa_etaria as string) faixa_etaria,
- safe_cast(quantidade_matricula as numeric) quantidade_matricula,
+ safe_cast(sexo as string) sexo,
+ safe_cast(quantidade_docente as int64) quantidade_docente,
from
- `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.educacao_especial_faixa_etaria`
+ `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.docente_faixa_etaria_sexo`
as t
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_localizacao.sql b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_localizacao.sql
new file mode 100644
index 00000000..a58cb833
--- /dev/null
+++ b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_localizacao.sql
@@ -0,0 +1,24 @@
+{{
+ config(
+ alias="docente_localizacao",
+ schema="br_inep_sinopse_estatistica_educacao_basica",
+ materialized="table",
+ partition_by={
+ "field": "ano",
+ "data_type": "int64",
+ "range": {"start": 2007, "end": 2023, "interval": 1},
+ },
+ cluster_by="sigla_uf",
+ )
+}}
+select
+ safe_cast(ano as int64) ano,
+ safe_cast(sigla_uf as string) sigla_uf,
+ safe_cast(replace(id_municipio, ".0", "") as string) id_municipio,
+ safe_cast(tipo_classe as string) tipo_classe,
+ safe_cast(rede as string) rede,
+ safe_cast(localizacao as string) localizacao,
+ safe_cast(quantidade_docente as int64) quantidade_docente,
+from
+ `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.docente_localizacao`
+ as t
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_localizacao.sql b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_regime_contrato.sql
similarity index 74%
rename from models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_localizacao.sql
rename to models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_regime_contrato.sql
index b5619396..74c2d810 100644
--- a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_localizacao.sql
+++ b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_regime_contrato.sql
@@ -1,6 +1,6 @@
{{
config(
- alias="educacao_especial_localizacao",
+ alias="docente_regime_contrato",
schema="br_inep_sinopse_estatistica_educacao_basica",
materialized="table",
partition_by={
@@ -17,8 +17,8 @@ select
safe_cast(id_municipio as string) id_municipio,
safe_cast(tipo_classe as string) tipo_classe,
safe_cast(rede as string) rede,
- safe_cast(localizacao as string) localizacao,
- safe_cast(quantidade_matricula as numeric) quantidade_matricula,
+ safe_cast(regime_contrato as string) regime_contrato,
+ safe_cast(quantidade_docente as int64) quantidade_docente,
from
- `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.educacao_especial_localizacao`
+ `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.docente_regime_contrato`
as t
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_deficiencia.py b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_deficiencia.py
new file mode 100644
index 00000000..4473f626
--- /dev/null
+++ b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_deficiencia.py
@@ -0,0 +1,172 @@
+import os
+import zipfile
+import pandas as pd
+import basedosdados as bd
+import numpy as np
+
+INPUT = os.path.join(os.getcwd(), "input")
+OUTPUT = os.path.join(os.getcwd(), "output")
+
+# os.makedirs(INPUT, exist_ok=True)
+# os.makedirs(OUTPUT, exist_ok=True)
+
+RENAME_DEFICIENCIA = {
+ "Educacao Especial - Classes Comuns": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Cegueira": "Cegueira",
+ "Baixa Visão": "Baixa Visão",
+ "Surdez": "Surdez",
+ "Deficiência Auditiva": "Deficiência Auditiva",
+ "Surdocegueira": "Surdocegueira",
+ "Deficiência Física": "Deficiência Física",
+ "Deficiência Intelectual": "Deficiência Intelectual",
+ "Deficiência Múltipla": "Deficiência Múltipla",
+ # "Transtorno do Espectro Autista": "Transtorno do Espectro Autista",
+ # "Altas Habilidades / Superdotação": "Altas Habilidades / Superdotação",
+ },
+ "Educacao Especial - Classes Exclusivas": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Cegueira": "Cegueira",
+ "Baixa Visão": "Baixa Visão",
+ "Surdez": "Surdez",
+ "Deficiência Auditiva": "Deficiência Auditiva",
+ "Surdocegueira": "Surdocegueira",
+ "Deficiência Física": "Deficiência Física",
+ "Deficiência Intelectual": "Deficiência Intelectual",
+ "Deficiência Múltipla": "Deficiência Múltipla",
+ # "Transtorno do Espectro Autista": "Transtorno do Espectro Autista",
+ # "Altas Habilidades / Superdotação": "Altas Habilidades / Superdotação",
+ },
+}
+
+deficiencia = {
+ "educacao_especial_classes_comuns": {
+ "dicionario": RENAME_DEFICIENCIA["Educacao Especial - Classes Comuns"],
+ "chave": "2.48",
+ "valor": "Educacao Especial - Classes Comuns",
+ "skiprows": 7,
+ "table": "docente_deficiencia",
+ },
+ "educacao_especial_classes_exclusivas": {
+ "dicionario": RENAME_DEFICIENCIA["Educacao Especial - Classes Exclusivas"],
+ "chave": "2.54",
+ "valor": "Educacao Especial - Classes Exclusivas",
+ "skiprows": 7,
+ "table": "docente_deficiencia",
+ },
+}
+
+
+def read_sheet(
+ table: str, ano: int, chave: str, valor: str, dicionario: dict, skiprows
+) -> pd.DataFrame:
+ print("Tratando dados de", valor, ano)
+ path_excel = os.path.join(
+ INPUT,
+ f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}",
+ f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}.xlsx",
+ )
+
+ df = pd.read_excel(
+ path_excel,
+ skiprows=skiprows,
+ sheet_name=chave,
+ )
+
+ sheets_etapa_ensino_serie = {chave: valor}
+
+ dfs_deficiencia = {
+ name: pd.read_excel(path_excel, skiprows=skiprows, sheet_name=sheet_name)
+ for sheet_name, name in sheets_etapa_ensino_serie.items()
+ }
+
+ dataframes = {}
+
+ for table_name, columns in dfs_deficiencia.items():
+ df = pd.DataFrame(columns) # Create DataFrame for each table
+ dataframes[table_name] = df # Store the DataFrame in a dictionary
+
+ print(df.columns)
+
+ def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:
+ cols_drop = [
+ col
+ for col in df.columns
+ if col.startswith("Unnamed") or col.startswith("Total")
+ ]
+
+ return df.drop(columns=cols_drop)
+
+ dfs_deficiencia = {
+ name: drop_unused_columns(
+ df.rename(columns=RENAME_DEFICIENCIA[name], errors="raise")
+ )
+ for name, df in dfs_deficiencia.items()
+ }
+
+ df_deficiencia = pd.concat(
+ [
+ df.pipe(
+ lambda d: d.loc[(d["id_municipio"].notna()) & (d["id_municipio"] != " "),]
+ )
+ .pipe(
+ lambda d: pd.melt(
+ d,
+ id_vars=["id_municipio", "uf"],
+ value_vars=d.columns.difference(
+ ["id_municipio", "uf"]
+ ).tolist(), # Convert to list
+ var_name="deficiencia",
+ value_name="quantidade_docente",
+ )
+ )
+ .assign(tipo_classe=tipo_classe)
+ for tipo_classe, df in dfs_deficiencia.items()
+ ]
+ )
+
+ bd_dir = bd.read_sql(
+ "SELECT nome, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`",
+ billing_project_id="basedosdados",
+ reauth=False,
+ )
+
+ df_deficiencia["uf"] = df_deficiencia["uf"].apply(lambda uf: uf.strip()).replace({i["nome"]: i["sigla"] for i in bd_dir.to_dict("records")}) # type: ignore
+
+ df_deficiencia = df_deficiencia.rename(columns={"uf": "sigla_uf"}, errors="raise")
+
+ df_deficiencia["quantidade_docente"] = df_deficiencia["quantidade_docente"].astype(
+ int
+ )
+
+ print("Particionando dados")
+ for sigla_uf, df in df_deficiencia.groupby("sigla_uf"):
+ path = os.path.join(OUTPUT, f"{table}", f"ano={ano}", f"sigla_uf={sigla_uf}")
+ if not os.path.exists(path):
+ os.makedirs(path, exist_ok=True)
+ df.drop(columns=["sigla_uf"]).to_csv(
+ os.path.join(path, "data.csv"), index=False, mode="w"
+ )
+ else:
+ df.drop(columns=["sigla_uf"]).to_csv(
+ os.path.join(path, "data.csv"), index=False, mode="a", header=False
+ )
+
+if __name__ == '__main__' :
+ lista = [
+ "educacao_especial_classes_comuns",
+ "educacao_especial_classes_exclusivas",
+ ]
+
+ for x in lista:
+ # for ano in range(2012, 2019):
+ read_sheet(
+ table=deficiencia[x]["table"],
+ ano=2011,
+ chave=deficiencia[x]["chave"],
+ valor=deficiencia[x]["valor"],
+ dicionario=deficiencia[x]["dicionario"],
+ skiprows=deficiencia[x]["skiprows"],
+ )
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_escolaridade.py b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_escolaridade.py
new file mode 100644
index 00000000..179dadf4
--- /dev/null
+++ b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_escolaridade.py
@@ -0,0 +1,347 @@
+import os
+import zipfile
+import pandas as pd
+import basedosdados as bd
+import numpy as np
+
+
+INPUT = os.path.join(os.getcwd(), "input")
+OUTPUT = os.path.join(os.getcwd(), "output")
+
+# os.makedirs(INPUT, exist_ok=True)
+# os.makedirs(OUTPUT, exist_ok=True)
+
+RENAMES_ETAPA_ENSINO_SERIE = {
+ "Educacao Basica": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Unnamed: 5": "Ensino Fundamental",
+ "Unnamed: 6": "Ensino Médio",
+ "Com Licenciatura10": "Graduação - Com Licenciatura",
+ "Sem Licenciatura": "Graduação - Sem Licenciatura",
+ "Especialização": "Pós Graduação - Especialização",
+ "Mestrado": "Pós Graduação - Mestrado",
+ "Doutorado": "Pós Graduação - Doutorado",
+ },
+ "Educacao Infantil - Creche": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Unnamed: 5": "Ensino Fundamental",
+ "Unnamed: 6": "Ensino Médio",
+ "Com Licenciatura10": "Graduação - Com Licenciatura",
+ "Sem Licenciatura": "Graduação - Sem Licenciatura",
+ "Especialização": "Pós Graduação - Especialização",
+ "Mestrado": "Pós Graduação - Mestrado",
+ "Doutorado": "Pós Graduação - Doutorado",
+ },
+ "Educacao Infantil - Pré-Escola": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Unnamed: 5": "Ensino Fundamental",
+ "Unnamed: 6": "Ensino Médio",
+ "Com Licenciatura10": "Graduação - Com Licenciatura",
+ "Sem Licenciatura": "Graduação - Sem Licenciatura",
+ "Especialização": "Pós Graduação - Especialização",
+ "Mestrado": "Pós Graduação - Mestrado",
+ "Doutorado": "Pós Graduação - Doutorado",
+ },
+ "Ensino Fundamental": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Unnamed: 5": "Ensino Fundamental",
+ "Unnamed: 6": "Ensino Médio",
+ "Com Licenciatura9": "Graduação - Com Licenciatura",
+ "Sem Licenciatura": "Graduação - Sem Licenciatura",
+ "Especialização": "Pós Graduação - Especialização",
+ "Mestrado": "Pós Graduação - Mestrado",
+ "Doutorado": "Pós Graduação - Doutorado",
+ },
+ "Ensino Fundamental - Anos Iniciais": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Unnamed: 5": "Ensino Fundamental",
+ "Unnamed: 6": "Ensino Médio",
+ "Com Licenciatura9": "Graduação - Com Licenciatura",
+ "Sem Licenciatura": "Graduação - Sem Licenciatura",
+ "Especialização": "Pós Graduação - Especialização",
+ "Mestrado": "Pós Graduação - Mestrado",
+ "Doutorado": "Pós Graduação - Doutorado",
+ },
+ "Ensino Fundamental - Anos Finais": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Unnamed: 5": "Ensino Fundamental",
+ "Unnamed: 6": "Ensino Médio",
+ "Com Licenciatura9": "Graduação - Com Licenciatura",
+ "Sem Licenciatura": "Graduação - Sem Licenciatura",
+ "Especialização": "Pós Graduação - Especialização",
+ "Mestrado": "Pós Graduação - Mestrado",
+ "Doutorado": "Pós Graduação - Doutorado",
+ },
+ "Ensino Médio": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Unnamed: 5": "Ensino Fundamental",
+ "Unnamed: 6": "Ensino Médio",
+ "Com Licenciatura9": "Graduação - Com Licenciatura",
+ "Sem Licenciatura": "Graduação - Sem Licenciatura",
+ "Especialização": "Pós Graduação - Especialização",
+ "Mestrado": "Pós Graduação - Mestrado",
+ "Doutorado": "Pós Graduação - Doutorado",
+ },
+ "Educacao Profissional": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Unnamed: 5": "Ensino Fundamental",
+ "Unnamed: 6": "Ensino Médio",
+ "Com Licenciatura9": "Graduação - Com Licenciatura",
+ "Sem Licenciatura": "Graduação - Sem Licenciatura",
+ "Especialização": "Pós Graduação - Especialização",
+ "Mestrado": "Pós Graduação - Mestrado",
+ "Doutorado": "Pós Graduação - Doutorado",
+ },
+ "EJA": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Unnamed: 5": "Ensino Fundamental",
+ "Unnamed: 6": "Ensino Médio",
+ "Com Licenciatura9": "Graduação - Com Licenciatura",
+ "Sem Licenciatura": "Graduação - Sem Licenciatura",
+ "Especialização": "Pós Graduação - Especialização",
+ "Mestrado": "Pós Graduação - Mestrado",
+ "Doutorado": "Pós Graduação - Doutorado",
+ },
+ "Educacao Especial - Classes Comuns": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Unnamed: 5": "Ensino Fundamental",
+ "Unnamed: 6": "Ensino Médio",
+ "Com Licenciatura9": "Graduação - Com Licenciatura",
+ "Sem Licenciatura": "Graduação - Sem Licenciatura",
+ "Especialização": "Pós Graduação - Especialização",
+ "Mestrado": "Pós Graduação - Mestrado",
+ "Doutorado": "Pós Graduação - Doutorado",
+ },
+ "Educacao Especial - Classes Exclusivas": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Unnamed: 5": "Ensino Fundamental",
+ "Unnamed: 6": "Ensino Médio",
+ "Com Licenciatura8": "Graduação - Com Licenciatura",
+ "Sem Licenciatura": "Graduação - Sem Licenciatura",
+ "Especialização": "Pós Graduação - Especialização",
+ "Mestrado": "Pós Graduação - Mestrado",
+ "Doutorado": "Pós Graduação - Doutorado",
+ },
+}
+
+
+escolaridade = {
+ "educacao_basica": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Basica"],
+ "chave": "2.4",
+ "valor": "Educacao Basica",
+ "skiprows": 9,
+ "table": "docente_escolaridade",
+ },
+ "ensino_infantil_creche": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil - Creche"],
+ # "chave": "2.10",
+ "chave": "2.9", # Para anos anteriores a 2010
+ "valor": "Educacao Infantil - Creche",
+ "skiprows": 9,
+ "table": "docente_escolaridade",
+ },
+ "educacao_infantil_pre_escola": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil - Pré-Escola"],
+ # "chave": "2.14",
+ "chave": "2.12", # Para anos anteriores a 2010
+ "valor": "Educacao Infantil - Pré-Escola",
+ "skiprows": 9,
+ "table": "docente_escolaridade",
+ },
+ "ensino_fundamental": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental"],
+ "chave": "2.19",
+ "chave": "2.16", # Para anos anteriores a 2010
+ "valor": "Ensino Fundamental",
+ "skiprows": 9,
+ "table": "docente_escolaridade",
+ },
+ "ensino_fundamental_anos_iniciais": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental - Anos Iniciais"],
+ # "chave": "2.23",
+ "chave": "2.19", # Para anos anteriores a 2010
+ "valor": "Ensino Fundamental - Anos Iniciais",
+ "skiprows": 9,
+ "table": "docente_escolaridade",
+ },
+ "ensino_fundamental_anos_finais": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental - Anos Finais"],
+ # "chave": "2.27",
+ "chave": "2.22", # Para anos anteriores a 2010
+ "valor": "Ensino Fundamental - Anos Finais",
+ "skiprows": 9,
+ "table": "docente_escolaridade",
+ },
+ "ensino_medio": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Médio"],
+ # "chave": "2.31",
+ "chave": "2.25", # Para anos anteriores a 2010
+ "valor": "Ensino Médio",
+ "skiprows": 9,
+ "table": "docente_escolaridade",
+ },
+ "educacao_profissional": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Profissional"],
+ # "chave": "2.36",
+ "chave": "2.29", # Para anos anteriores a 2010
+ "valor": "Educacao Profissional",
+ "skiprows": 9,
+ "table": "docente_escolaridade",
+ },
+ "EJA": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["EJA"],
+ # "chave": "2.41",
+ "chave": "2.33", # Para anos anteriores a 2010
+ "valor": "EJA",
+ "skiprows": 9,
+ "table": "docente_escolaridade",
+ },
+ "educacao_especial_classes_comuns": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Especial - Classes Comuns"],
+ # "chave": "2.47",
+ "chave": "2.38", # Para anos anteriores a 2010
+ "valor": "Educacao Especial - Classes Comuns",
+ "skiprows": 9,
+ "table": "docente_escolaridade",
+ },
+ "educacao_especial_classes_exclusivas": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE[
+ "Educacao Especial - Classes Exclusivas"
+ ],
+ # "chave": "2.53",
+ "chave": "2.52", # Para o ano de 2011
+ "chave": "2.42", # Para anos anteriores a 2010
+ "valor": "Educacao Especial - Classes Exclusivas",
+ "skiprows": 9,
+ "table": "docente_escolaridade",
+ },
+}
+
+
+def read_sheet(
+ table: str, ano: int, chave: str, valor: str, dicionario: dict, skiprows: int = 9
+) -> pd.DataFrame:
+ print("Tratando dados de", valor, ano)
+ path_excel = os.path.join(
+ INPUT,
+ f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}",
+ f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}.xlsx",
+ )
+ df = pd.read_excel(
+ path_excel,
+ skiprows=skiprows,
+ sheet_name=chave,
+ )
+
+ sheets_escolaridade = {chave: valor}
+
+ dfs_escolaridade = {
+ name: pd.read_excel(
+ path_excel,
+ skiprows=skiprows,
+ sheet_name=sheet_name,
+ )
+ for sheet_name, name in sheets_escolaridade.items()
+ }
+
+ dataframes = {}
+ for table_name, columns in dfs_escolaridade.items():
+ df = pd.DataFrame(columns)
+ dataframes[table_name] = df
+
+ print(df.columns)
+
+ def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:
+ cols_drop = [
+ col
+ for col in df.columns
+ if col.startswith("Unnamed") or col.startswith("Total")
+ ]
+
+ return df.drop(columns=cols_drop)
+
+ dfs_escolaridade = {
+ name: drop_unused_columns(df.rename(columns=dicionario, errors="raise"))
+ for name, df in dfs_escolaridade.items()
+ }
+
+ df_escolaridade = pd.concat(
+ [
+ df.pipe(
+ lambda d: d.loc[(d["id_municipio"].notna()) & (d["id_municipio"] != " "),]
+ )
+ .pipe(
+ lambda d: pd.melt(
+ d,
+ id_vars=["id_municipio", "uf"],
+ value_vars=d.columns.difference(
+ ["id_municipio", "uf"]
+ ).tolist(), # Convert to list
+ var_name="escolaridade",
+ value_name="quantidade_docente",
+ )
+ )
+ .assign(tipo_classe=tipo_classe)
+ for tipo_classe, df in dfs_escolaridade.items()
+ ]
+ )
+
+ bd_dir = bd.read_sql(
+ "SELECT nome, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`",
+ billing_project_id="basedosdados",
+ reauth=False,
+ )
+
+ df_escolaridade["uf"] = df_escolaridade["uf"].apply(lambda uf: uf.strip()).replace({i["nome"]: i["sigla"] for i in bd_dir.to_dict("records")}) # type: ignore
+
+ df_escolaridade = df_escolaridade.rename(columns={"uf": "sigla_uf"}, errors="raise")
+
+ print("Particionando dados")
+ for sigla_uf, df in df_escolaridade.groupby("sigla_uf"):
+ path = os.path.join(OUTPUT, f"{table}", f"ano={ano}", f"sigla_uf={sigla_uf}")
+ if not os.path.exists(path):
+ os.makedirs(path, exist_ok=True)
+ df.drop(columns=["sigla_uf"]).to_csv(
+ os.path.join(path, "data.csv"), index=False, mode="w"
+ )
+ else:
+ df.drop(columns=["sigla_uf"]).to_csv(
+ os.path.join(path, "data.csv"), index=False, mode="a", header=False
+ )
+
+if __name__ == '__main__' :
+ lista = [
+ "educacao_basica",
+ "ensino_infantil_creche",
+ "educacao_infantil_pre_escola",
+ "ensino_fundamental",
+ "ensino_fundamental_anos_iniciais",
+ "ensino_fundamental_anos_finais",
+ "ensino_medio",
+ "educacao_profissional",
+ "EJA",
+ "educacao_especial_classes_comuns",
+ "educacao_especial_classes_exclusivas",
+ ]
+
+ for x in lista:
+ read_sheet(
+ table=escolaridade[x]["table"],
+ ano=2007,
+ chave=escolaridade[x]["chave"],
+ valor=escolaridade[x]["valor"],
+ dicionario=escolaridade[x]["dicionario"],
+ skiprows=escolaridade[x]["skiprows"],
+ )
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_faixa_etaria_sexo.py b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_faixa_etaria_sexo.py
new file mode 100644
index 00000000..fb16a601
--- /dev/null
+++ b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_faixa_etaria_sexo.py
@@ -0,0 +1,444 @@
+import os
+import zipfile
+import pandas as pd
+import basedosdados as bd
+import numpy as np
+
+
+INPUT = os.path.join(os.getcwd(), "input")
+OUTPUT = os.path.join(os.getcwd(), "output")
+
+# os.makedirs(INPUT, exist_ok=True)
+# os.makedirs(OUTPUT, exist_ok=True)
+
+RENAMES_ETAPA_ENSINO_SERIE = {
+ "Educacao Basica": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Até 24 anos": "Feminino_Até 24 anos",
+ "De 25 a 29 anos": "Feminino_25 a 29 anos",
+ "De 30 a 39 anos": "Feminino_30 a 39 anos",
+ "De 40 a 49 anos": "Feminino_40 a 49 anos",
+ "De 50 a 54 anos": "Feminino_50 a 54 anos",
+ "De 55 a 59 anos": "Feminino_55 a 59 anos",
+ "60 anos ou mais": "Feminino_60 anos ou mais",
+ "Até 24 anos.1": "Masculino_Até 24 anos",
+ "De 25 a 29 anos.1": "Masculino_25 a 29 anos",
+ "De 30 a 39 anos.1": "Masculino_30 a 39 anos",
+ "De 40 a 49 anos.1": "Masculino_40 a 49 anos",
+ "De 50 a 54 anos.1": "Masculino_50 a 54 anos",
+ "De 55 a 59 anos.1": "Masculino_55 a 59 anos",
+ "60 anos ou mais.1": "Masculino_60 anos ou mais",
+ },
+ "Educacao Infantil - Creche": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Até 24 anos": "Feminino_Até 24 anos",
+ "De 25 a 29 anos": "Feminino_25 a 29 anos",
+ "De 30 a 39 anos": "Feminino_30 a 39 anos",
+ "De 40 a 49 anos": "Feminino_40 a 49 anos",
+ "De 50 a 54 anos": "Feminino_50 a 54 anos",
+ "De 55 a 59 anos": "Feminino_55 a 59 anos",
+ "60 anos ou mais": "Feminino_60 anos ou mais",
+ "Até 24 anos.1": "Masculino_Até 24 anos",
+ "De 25 a 29 anos.1": "Masculino_25 a 29 anos",
+ "De 30 a 39 anos.1": "Masculino_30 a 39 anos",
+ "De 40 a 49 anos.1": "Masculino_40 a 49 anos",
+ "De 50 a 54 anos.1": "Masculino_50 a 54 anos",
+ "De 55 a 59 anos.1": "Masculino_55 a 59 anos",
+ "60 anos ou mais.1": "Masculino_60 anos ou mais",
+ },
+ "Educacao Infantil - Pré-Escola": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Até 24 anos": "Feminino_Até 24 anos",
+ "De 25 a 29 anos": "Feminino_25 a 29 anos",
+ "De 30 a 39 anos": "Feminino_30 a 39 anos",
+ "De 40 a 49 anos": "Feminino_40 a 49 anos",
+ "De 50 a 54 anos": "Feminino_50 a 54 anos",
+ "De 55 a 59 anos": "Feminino_55 a 59 anos",
+ "60 anos ou mais": "Feminino_60 anos ou mais",
+ "Até 24 anos.1": "Masculino_Até 24 anos",
+ "De 25 a 29 anos.1": "Masculino_25 a 29 anos",
+ "De 30 a 39 anos.1": "Masculino_30 a 39 anos",
+ "De 40 a 49 anos.1": "Masculino_40 a 49 anos",
+ "De 50 a 54 anos.1": "Masculino_50 a 54 anos",
+ "De 55 a 59 anos.1": "Masculino_55 a 59 anos",
+ "60 anos ou mais.1": "Masculino_60 anos ou mais",
+ },
+ "Ensino Fundamental": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Até 24 anos": "Feminino_Até 24 anos",
+ "De 25 a 29 anos": "Feminino_25 a 29 anos",
+ "De 30 a 39 anos": "Feminino_30 a 39 anos",
+ "De 40 a 49 anos": "Feminino_40 a 49 anos",
+ "De 50 a 54 anos": "Feminino_50 a 54 anos",
+ "De 55 a 59 anos": "Feminino_55 a 59 anos",
+ "60 anos ou mais": "Feminino_60 anos ou mais",
+ "Até 24 anos.1": "Masculino_Até 24 anos",
+ "De 25 a 29 anos.1": "Masculino_25 a 29 anos",
+ "De 30 a 39 anos.1": "Masculino_30 a 39 anos",
+ "De 40 a 49 anos.1": "Masculino_40 a 49 anos",
+ "De 50 a 54 anos.1": "Masculino_50 a 54 anos",
+ "De 55 a 59 anos.1": "Masculino_55 a 59 anos",
+ "60 anos ou mais.1": "Masculino_60 anos ou mais",
+ },
+ "Ensino Fundamental - Anos Iniciais": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Até 24 anos": "Feminino_Até 24 anos",
+ "De 25 a 29 anos": "Feminino_25 a 29 anos",
+ "De 30 a 39 anos": "Feminino_30 a 39 anos",
+ "De 40 a 49 anos": "Feminino_40 a 49 anos",
+ "De 50 a 54 anos": "Feminino_50 a 54 anos",
+ "De 55 a 59 anos": "Feminino_55 a 59 anos",
+ "60 anos ou mais": "Feminino_60 anos ou mais",
+ "Até 24 anos.1": "Masculino_Até 24 anos",
+ "De 25 a 29 anos.1": "Masculino_25 a 29 anos",
+ "De 30 a 39 anos.1": "Masculino_30 a 39 anos",
+ "De 40 a 49 anos.1": "Masculino_40 a 49 anos",
+ "De 50 a 54 anos.1": "Masculino_50 a 54 anos",
+ "De 55 a 59 anos.1": "Masculino_55 a 59 anos",
+ "60 anos ou mais.1": "Masculino_60 anos ou mais",
+ },
+ "Ensino Fundamental - Anos Finais": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Até 24 anos": "Feminino_Até 24 anos",
+ "De 25 a 29 anos": "Feminino_25 a 29 anos",
+ "De 30 a 39 anos": "Feminino_30 a 39 anos",
+ "De 40 a 49 anos": "Feminino_40 a 49 anos",
+ "De 50 a 54 anos": "Feminino_50 a 54 anos",
+ "De 55 a 59 anos": "Feminino_55 a 59 anos",
+ "60 anos ou mais": "Feminino_60 anos ou mais",
+ "Até 24 anos.1": "Masculino_Até 24 anos",
+ "De 25 a 29 anos.1": "Masculino_25 a 29 anos",
+ "De 30 a 39 anos.1": "Masculino_30 a 39 anos",
+ "De 40 a 49 anos.1": "Masculino_40 a 49 anos",
+ "De 50 a 54 anos.1": "Masculino_50 a 54 anos",
+ "De 55 a 59 anos.1": "Masculino_55 a 59 anos",
+ "60 anos ou mais.1": "Masculino_60 anos ou mais",
+ },
+ "Ensino Médio": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Até 24 anos": "Feminino_Até 24 anos",
+ "De 25 a 29 anos": "Feminino_25 a 29 anos",
+ "De 30 a 39 anos": "Feminino_30 a 39 anos",
+ "De 40 a 49 anos": "Feminino_40 a 49 anos",
+ "De 50 a 54 anos": "Feminino_50 a 54 anos",
+ "De 55 a 59 anos": "Feminino_55 a 59 anos",
+ "60 anos ou mais": "Feminino_60 anos ou mais",
+ "Até 24 anos.1": "Masculino_Até 24 anos",
+ "De 25 a 29 anos.1": "Masculino_25 a 29 anos",
+ "De 30 a 39 anos.1": "Masculino_30 a 39 anos",
+ "De 40 a 49 anos.1": "Masculino_40 a 49 anos",
+ "De 50 a 54 anos.1": "Masculino_50 a 54 anos",
+ "De 55 a 59 anos.1": "Masculino_55 a 59 anos",
+ "60 anos ou mais.1": "Masculino_60 anos ou mais",
+ },
+ "Educacao Profissional": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Até 24 anos": "Feminino_Até 24 anos",
+ "De 25 a 29 anos": "Feminino_25 a 29 anos",
+ "De 30 a 39 anos": "Feminino_30 a 39 anos",
+ "De 40 a 49 anos": "Feminino_40 a 49 anos",
+ "De 50 a 54 anos": "Feminino_50 a 54 anos",
+ "De 55 a 59 anos": "Feminino_55 a 59 anos",
+ "60 anos ou mais": "Feminino_60 anos ou mais",
+ "Até 24 anos.1": "Masculino_Até 24 anos",
+ "De 25 a 29 anos.1": "Masculino_25 a 29 anos",
+ "De 30 a 39 anos.1": "Masculino_30 a 39 anos",
+ "De 40 a 49 anos.1": "Masculino_40 a 49 anos",
+ "De 50 a 54 anos.1": "Masculino_50 a 54 anos",
+ "De 55 a 59 anos.1": "Masculino_55 a 59 anos",
+ "60 anos ou mais.1": "Masculino_60 anos ou mais",
+ },
+ "EJA": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Até 24 anos": "Feminino_Até 24 anos",
+ "De 25 a 29 anos": "Feminino_25 a 29 anos",
+ "De 30 a 39 anos": "Feminino_30 a 39 anos",
+ "De 40 a 49 anos": "Feminino_40 a 49 anos",
+ "De 50 a 54 anos": "Feminino_50 a 54 anos",
+ "De 55 a 59 anos": "Feminino_55 a 59 anos",
+ "60 anos ou mais": "Feminino_60 anos ou mais",
+ "Até 24 anos.1": "Masculino_Até 24 anos",
+ "De 25 a 29 anos.1": "Masculino_25 a 29 anos",
+ "De 30 a 39 anos.1": "Masculino_30 a 39 anos",
+ "De 40 a 49 anos.1": "Masculino_40 a 49 anos",
+ "De 50 a 54 anos.1": "Masculino_50 a 54 anos",
+ "De 55 a 59 anos.1": "Masculino_55 a 59 anos",
+ "60 anos ou mais.1": "Masculino_60 anos ou mais",
+ },
+ "Educacao Especial - Classes Comuns": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Até 24 anos": "Feminino_Até 24 anos",
+ "De 25 a 29 anos": "Feminino_25 a 29 anos",
+ "De 30 a 39 anos": "Feminino_30 a 39 anos",
+ "De 40 a 49 anos": "Feminino_40 a 49 anos",
+ "De 50 a 54 anos": "Feminino_50 a 54 anos",
+ "De 55 a 59 anos": "Feminino_55 a 59 anos",
+ "60 anos ou mais": "Feminino_60 anos ou mais",
+ "Até 24 anos.1": "Masculino_Até 24 anos",
+ "De 25 a 29 anos.1": "Masculino_25 a 29 anos",
+ "De 30 a 39 anos.1": "Masculino_30 a 39 anos",
+ "De 40 a 49 anos.1": "Masculino_40 a 49 anos",
+ "De 50 a 54 anos.1": "Masculino_50 a 54 anos",
+ "De 55 a 59 anos.1": "Masculino_55 a 59 anos",
+ "60 anos ou mais.1": "Masculino_60 anos ou mais",
+ },
+ "Educacao Especial - Classes Exclusivas": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Até 24 anos": "Feminino_Até 24 anos",
+ "De 25 a 29 anos": "Feminino_25 a 29 anos",
+ "De 30 a 39 anos": "Feminino_30 a 39 anos",
+ "De 40 a 49 anos": "Feminino_40 a 49 anos",
+ "De 50 a 54 anos": "Feminino_50 a 54 anos",
+ "De 55 a 59 anos": "Feminino_55 a 59 anos",
+ "60 anos ou mais": "Feminino_60 anos ou mais",
+ "Até 24 anos.1": "Masculino_Até 24 anos",
+ "De 25 a 29 anos.1": "Masculino_25 a 29 anos",
+ "De 30 a 39 anos.1": "Masculino_30 a 39 anos",
+ "De 40 a 49 anos.1": "Masculino_40 a 49 anos",
+ "De 50 a 54 anos.1": "Masculino_50 a 54 anos",
+ "De 55 a 59 anos.1": "Masculino_55 a 59 anos",
+ "60 anos ou mais.1": "Masculino_60 anos ou mais",
+ },
+}
+
+
+localizacao = {
+ "educacao_basica": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Basica"],
+ "chave": "2.3",
+ "valor": "Educacao Basica",
+ "skiprows": 8,
+ "table": "docente_faixa_etaria_sexo",
+ },
+ "educacao_infantil_creche": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil - Creche"],
+ #"chave": "2.9",
+ "chave": "2.8", # para anos anteriores a 2010
+ "valor": "Educacao Infantil - Creche",
+ "skiprows": 8,
+ "table": "docente_faixa_etaria_sexo",
+ },
+ "educacao_infantil_pre_escola": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil - Pré-Escola"],
+ #"chave": "2.13",
+ "chave": "2.11", # Para anos anteriores a 2010
+ "valor": "Educacao Infantil - Pré-Escola",
+ "skiprows": 8,
+ "table": "docente_faixa_etaria_sexo",
+ },
+ "ensino_fundamental": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental"],
+ #"chave": "2.18",
+ "chave": "2.15", # Para anos anteriores a 2010
+ "valor": "Ensino Fundamental",
+ "skiprows": 8,
+ "table": "docente_faixa_etaria_sexo",
+ },
+ "ensino_fundamental_anos_iniciais": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental - Anos Iniciais"],
+ #"chave": "2.22",
+ "chave": "2.18", # Para anos anteriores a 2010
+ "valor": "Ensino Fundamental - Anos Iniciais",
+ "skiprows": 8,
+ "table": "docente_faixa_etaria_sexo",
+ },
+ "ensino_fundamental_anos_finais": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental - Anos Finais"],
+ # "chave": "2.26",
+ "chave": "2.21", # Para anos anteriores a 2010
+ "valor": "Ensino Fundamental - Anos Finais",
+ "skiprows": 8,
+ "table": "docente_faixa_etaria_sexo",
+ },
+ "ensino_medio": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Médio"],
+ #"chave": "2.30",
+ "chave": "2.24", # Para anos anteriores a 2010
+ "valor": "Ensino Médio",
+ "skiprows": 8,
+ "table": "docente_faixa_etaria_sexo",
+ },
+ "educacao_profissional": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Profissional"],
+ "chave": "2.35",
+ "chave": "2.28", # Para anos anteriores a 2010
+ "valor": "Educacao Profissional",
+ "skiprows": 8,
+ "table": "docente_faixa_etaria_sexo",
+ },
+ "EJA": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["EJA"],
+ #"chave": "2.40",
+ "chave": "2.32", # Para anos anteriores a 2010
+ "valor": "EJA",
+ "skiprows": 8,
+ "table": "docente_faixa_etaria_sexo",
+ },
+ "educacao_especial_classes_comuns": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Especial - Classes Comuns"],
+ #"chave": "2.46",
+ "chave": "2.37", # Para anos anteriores a 2010
+ "valor": "Educacao Especial - Classes Comuns",
+ "skiprows": 8,
+ "table": "docente_faixa_etaria_sexo",
+ },
+ "educacao_especial_classes_exclusivas": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE[
+ "Educacao Especial - Classes Exclusivas"
+ ],
+ #"chave": "2.52",
+ #"chave": "2.51", # Para o ano de 2011
+ "chave": "2.41", # Para o ano anteriores a 2010
+ "valor": "Educacao Especial - Classes Exclusivas",
+ "skiprows": 8,
+ "table": "docente_faixa_etaria_sexo",
+ },
+}
+
+
+def read_sheet(
+ table: str, ano: int, chave: str, valor: str, dicionario: dict, skiprows: int = 9
+) -> pd.DataFrame:
+ print("Tratando dados de", valor, ano)
+ path_excel = os.path.join(
+ INPUT,
+ # f"sinopse_estatística_educaç╞o_básica_{ano}",
+ # f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}.xlsx",
+ f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}",
+ f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}.xlsx",
+ )
+ df = pd.read_excel(
+ path_excel,
+ skiprows=skiprows,
+ sheet_name=chave,
+ )
+
+ sheets_etapa_ensino_serie = {chave: valor}
+
+ dfs_faixa_etaria = {
+ name: pd.read_excel(
+ path_excel,
+ skiprows=skiprows,
+ sheet_name=sheet_name,
+ )
+ for sheet_name, name in sheets_etapa_ensino_serie.items()
+ }
+
+ dataframes = {}
+ for table_name, columns in dfs_faixa_etaria.items():
+ df = pd.DataFrame(columns)
+ dataframes[table_name] = df
+
+ print(df.columns)
+
+ def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:
+ cols_drop = [
+ col
+ for col in df.columns
+ if col.startswith("Unnamed") or col.startswith("Total")
+ ]
+
+ return df.drop(columns=cols_drop)
+
+ dfs_faixa_etaria = {
+ name: drop_unused_columns(
+ df.rename(columns=RENAMES_ETAPA_ENSINO_SERIE[name], errors="raise")
+ )
+ for name, df in dfs_faixa_etaria.items()
+ }
+
+ df_faixa_etaria = pd.concat(
+ [
+ df.pipe(
+ lambda d: d.loc[(d["id_municipio"].notna()) & (d["id_municipio"] != " "),]
+ )
+ .pipe(
+ lambda d: pd.melt(
+ d,
+ id_vars=["id_municipio", "uf"],
+ value_vars=d.columns.difference(
+ ["id_municipio", "uf"]
+ ).tolist(), # Convert to list
+ var_name="faixa_etaria",
+ value_name="quantidade_docente",
+ )
+ )
+ .assign(tipo_classe=tipo_classe)
+ for tipo_classe, df in dfs_faixa_etaria.items()
+ ]
+ )
+
+ bd_dir = bd.read_sql(
+ "SELECT nome, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`",
+ billing_project_id="basedosdados",
+ reauth=False,
+ )
+
+ df_faixa_etaria["uf"] = (
+ df_faixa_etaria["uf"]
+ .apply(lambda uf: uf.strip())
+ .replace({i["nome"]: i["sigla"] for i in bd_dir.to_dict("records")}) # type: ignore
+ )
+
+ df_faixa_etaria = df_faixa_etaria.rename(columns={"uf": "sigla_uf"}, errors="raise")
+
+ df_faixa_etaria["sexo"] = df_faixa_etaria["faixa_etaria"].apply(
+ lambda v: v.split("_")[-1]
+ )
+
+ df_faixa_etaria["faixa_etaria"] = df_faixa_etaria["faixa_etaria"].apply(
+ lambda v: v.split("_")[0]
+ )
+
+ df_faixa_etaria["quantidade_docente"] = df_faixa_etaria["quantidade_docente"].astype(
+ int
+ )
+
+ print("Particionando dados")
+ for sigla_uf, df in df_faixa_etaria.groupby("sigla_uf"):
+ path = os.path.join(OUTPUT, f"{table}", f"ano={ano}", f"sigla_uf={sigla_uf}")
+ if not os.path.exists(path):
+ os.makedirs(path, exist_ok=True)
+ df.drop(columns=["sigla_uf"]).to_csv(
+ os.path.join(path, "data.csv"), index=False, mode="w"
+ )
+ else:
+ df.drop(columns=["sigla_uf"]).to_csv(
+ os.path.join(path, "data.csv"), index=False, mode="a", header=False
+ )
+
+if __name__ == '__main__' :
+ lista = [
+ "educacao_basica",
+ "educacao_infantil_creche",
+ "educacao_infantil_pre_escola",
+ "ensino_fundamental",
+ "ensino_fundamental_anos_iniciais",
+ "ensino_fundamental_anos_finais",
+ "ensino_medio",
+ "educacao_profissional",
+ "EJA",
+ "educacao_especial_classes_comuns",
+ "educacao_especial_classes_exclusivas",
+ ]
+
+ for x in lista:
+ read_sheet(
+ table=localizacao[x]["table"],
+ ano=2007,
+ chave=localizacao[x]["chave"],
+ valor=localizacao[x]["valor"],
+ dicionario=localizacao[x]["dicionario"],
+ skiprows=localizacao[x]["skiprows"],
+ )
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_localizacao.py b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_localizacao.py
new file mode 100644
index 00000000..dc546fbf
--- /dev/null
+++ b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_localizacao.py
@@ -0,0 +1,544 @@
+import os
+import zipfile
+import pandas as pd
+import basedosdados as bd
+import numpy as np
+
+
+INPUT = os.path.join(os.getcwd(), "input")
+OUTPUT = os.path.join(os.getcwd(), "output")
+
+# os.makedirs(INPUT, exist_ok=True)
+# os.makedirs(OUTPUT, exist_ok=True)
+
+RENAMES_ETAPA_ENSINO_SERIE = {
+ "Educacao Basica": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Dependência Administrativa_Pública",
+ "Federal": "Dependência Administrativa_Federal",
+ "Estadual": "Dependência Administrativa_Estadual",
+ "Municipal": "Dependência Administrativa_Municipal",
+ "Privada": "Dependência Administrativa_Privada",
+ "Pública.1": "Urbana_Pública",
+ "Federal.1": "Urbana_Federal",
+ "Estadual.1": "Urbana_Estadual",
+ "Municipal.1": "Urbana_Municipal",
+ "Privada.1": "Urbana_Privada",
+ "Pública.2": "Rural_Pública",
+ "Federal.2": "Rural_Federal",
+ "Estadual.2": "Rural_Estadual",
+ "Municipal.2": "Rural_Municipal",
+ "Privada.2": "Rural_Privada",
+ },
+ "Educacao Infantil": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Dependência Administrativa_Pública",
+ "Federal": "Dependência Administrativa_Federal",
+ "Estadual": "Dependência Administrativa_Estadual",
+ "Municipal": "Dependência Administrativa_Municipal",
+ "Privada": "Dependência Administrativa_Privada",
+ "Pública.1": "Urbana_Pública",
+ "Federal.1": "Urbana_Federal",
+ "Estadual.1": "Urbana_Estadual",
+ "Municipal.1": "Urbana_Municipal",
+ "Privada.1": "Urbana_Privada",
+ "Pública.2": "Rural_Pública",
+ "Federal.2": "Rural_Federal",
+ "Estadual.2": "Rural_Estadual",
+ "Municipal.2": "Rural_Municipal",
+ "Privada.2": "Rural_Privada",
+ },
+ "Educacao Infantil - Creche": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Dependência Administrativa_Pública",
+ "Federal": "Dependência Administrativa_Federal",
+ "Estadual": "Dependência Administrativa_Estadual",
+ "Municipal": "Dependência Administrativa_Municipal",
+ "Privada": "Dependência Administrativa_Privada",
+ "Pública.1": "Urbana_Pública",
+ "Federal.1": "Urbana_Federal",
+ "Estadual.1": "Urbana_Estadual",
+ "Municipal.1": "Urbana_Municipal",
+ "Privada.1": "Urbana_Privada",
+ "Pública.2": "Rural_Pública",
+ "Federal.2": "Rural_Federal",
+ "Estadual.2": "Rural_Estadual",
+ "Municipal.2": "Rural_Municipal",
+ "Privada.2": "Rural_Privada",
+ ####
+ # Para 2014
+ ####
+ # "Unnamed: 1": "uf",
+ # "Unnamed: 3": "id_municipio",
+ # "Pública ": "Dependência Administrativa_Pública",
+ # "Federal": "Dependência Administrativa_Federal",
+ # "Estadual": "Dependência Administrativa_Estadual",
+ # "Municipal": "Dependência Administrativa_Municipal",
+ # "Privada": "Dependência Administrativa_Privada",
+ # "Pública": "Urbana_Pública",
+ # "Federal.1": "Urbana_Federal",
+ # "Estadual.1": "Urbana_Estadual",
+ # "Municipal.1": "Urbana_Municipal",
+ # "Privada.1": "Urbana_Privada",
+ # "Pública.1": "Rural_Pública",
+ # "Federal.2": "Rural_Federal",
+ # "Estadual.2": "Rural_Estadual",
+ # "Municipal.2": "Rural_Municipal",
+ # "Privada.2": "Rural_Privada",
+ },
+ "Educacao Infantil - Pré-Escola": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Dependência Administrativa_Pública",
+ "Federal": "Dependência Administrativa_Federal",
+ "Estadual": "Dependência Administrativa_Estadual",
+ "Municipal": "Dependência Administrativa_Municipal",
+ "Privada": "Dependência Administrativa_Privada",
+ "Pública.1": "Urbana_Pública",
+ "Federal.1": "Urbana_Federal",
+ "Estadual.1": "Urbana_Estadual",
+ "Municipal.1": "Urbana_Municipal",
+ "Privada.1": "Urbana_Privada",
+ "Pública.2": "Rural_Pública",
+ "Federal.2": "Rural_Federal",
+ "Estadual.2": "Rural_Estadual",
+ "Municipal.2": "Rural_Municipal",
+ "Privada.2": "Rural_Privada",
+ },
+ "Ensino Fundamental": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Dependência Administrativa_Pública",
+ "Federal": "Dependência Administrativa_Federal",
+ "Estadual": "Dependência Administrativa_Estadual",
+ "Municipal": "Dependência Administrativa_Municipal",
+ "Privada": "Dependência Administrativa_Privada",
+ "Pública.1": "Urbana_Pública",
+ "Federal.1": "Urbana_Federal",
+ "Estadual.1": "Urbana_Estadual",
+ "Municipal.1": "Urbana_Municipal",
+ "Privada.1": "Urbana_Privada",
+ "Pública.2": "Rural_Pública",
+ "Federal.2": "Rural_Federal",
+ "Estadual.2": "Rural_Estadual",
+ "Municipal.2": "Rural_Municipal",
+ "Privada.2": "Rural_Privada",
+ },
+ "Ensino Fundamental - Anos Iniciais": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Dependência Administrativa_Pública",
+ "Federal": "Dependência Administrativa_Federal",
+ "Estadual": "Dependência Administrativa_Estadual",
+ "Municipal": "Dependência Administrativa_Municipal",
+ "Privada": "Dependência Administrativa_Privada",
+ "Pública.1": "Urbana_Pública",
+ "Federal.1": "Urbana_Federal",
+ "Estadual.1": "Urbana_Estadual",
+ "Municipal.1": "Urbana_Municipal",
+ "Privada.1": "Urbana_Privada",
+ "Pública.2": "Rural_Pública",
+ "Federal.2": "Rural_Federal",
+ "Estadual.2": "Rural_Estadual",
+ "Municipal.2": "Rural_Municipal",
+ "Privada.2": "Rural_Privada",
+ },
+ "Ensino Fundamental - Anos Finais": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Dependência Administrativa_Pública",
+ "Federal": "Dependência Administrativa_Federal",
+ "Estadual": "Dependência Administrativa_Estadual",
+ "Municipal": "Dependência Administrativa_Municipal",
+ "Privada": "Dependência Administrativa_Privada",
+ "Pública.1": "Urbana_Pública",
+ "Federal.1": "Urbana_Federal",
+ "Estadual.1": "Urbana_Estadual",
+ "Municipal.1": "Urbana_Municipal",
+ "Privada.1": "Urbana_Privada",
+ "Pública.2": "Rural_Pública",
+ "Federal.2": "Rural_Federal",
+ "Estadual.2": "Rural_Estadual",
+ "Municipal.2": "Rural_Municipal",
+ "Privada.2": "Rural_Privada",
+ },
+ "Ensino Médio": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Dependência Administrativa_Pública",
+ "Federal": "Dependência Administrativa_Federal",
+ "Estadual": "Dependência Administrativa_Estadual",
+ "Municipal": "Dependência Administrativa_Municipal",
+ "Privada": "Dependência Administrativa_Privada",
+ "Pública.1": "Urbana_Pública",
+ "Federal.1": "Urbana_Federal",
+ "Estadual.1": "Urbana_Estadual",
+ "Municipal.1": "Urbana_Municipal",
+ "Privada.1": "Urbana_Privada",
+ "Pública.2": "Rural_Pública",
+ "Federal.2": "Rural_Federal",
+ "Estadual.2": "Rural_Estadual",
+ "Municipal.2": "Rural_Municipal",
+ "Privada.2": "Rural_Privada",
+ },
+ "Educacao Profissional": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Dependência Administrativa_Pública",
+ "Federal": "Dependência Administrativa_Federal",
+ "Estadual": "Dependência Administrativa_Estadual",
+ "Municipal": "Dependência Administrativa_Municipal",
+ "Privada": "Dependência Administrativa_Privada",
+ "Pública.1": "Urbana_Pública",
+ "Federal.1": "Urbana_Federal",
+ "Estadual.1": "Urbana_Estadual",
+ "Municipal.1": "Urbana_Municipal",
+ "Privada.1": "Urbana_Privada",
+ "Pública.2": "Rural_Pública",
+ "Federal.2": "Rural_Federal",
+ "Estadual.2": "Rural_Estadual",
+ "Municipal.2": "Rural_Municipal",
+ "Privada.2": "Rural_Privada",
+ },
+ "EJA": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Dependência Administrativa_Pública",
+ "Federal": "Dependência Administrativa_Federal",
+ "Estadual": "Dependência Administrativa_Estadual",
+ "Municipal": "Dependência Administrativa_Municipal",
+ "Privada": "Dependência Administrativa_Privada",
+ "Pública.1": "Urbana_Pública",
+ "Federal.1": "Urbana_Federal",
+ "Estadual.1": "Urbana_Estadual",
+ "Municipal.1": "Urbana_Municipal",
+ "Privada.1": "Urbana_Privada",
+ "Pública.2": "Rural_Pública",
+ "Federal.2": "Rural_Federal",
+ "Estadual.2": "Rural_Estadual",
+ "Municipal.2": "Rural_Municipal",
+ "Privada.2": "Rural_Privada",
+ ####
+ # Para 2014
+ ####
+ # "Unnamed: 1": "uf",
+ # "Unnamed: 3": "id_municipio",
+ # "Publica ": "Dependência Administrativa_Pública",
+ # "Federal": "Dependência Administrativa_Federal",
+ # "Estadual": "Dependência Administrativa_Estadual",
+ # "Municipal": "Dependência Administrativa_Municipal",
+ # "Privada": "Dependência Administrativa_Privada",
+ # "Pública": "Urbana_Pública",
+ # "Federal.1": "Urbana_Federal",
+ # "Estadual.1": "Urbana_Estadual",
+ # "Municipal.1": "Urbana_Municipal",
+ # "Privada.1": "Urbana_Privada",
+ # "Pública.1": "Rural_Pública",
+ # "Federal.2": "Rural_Federal",
+ # "Estadual.2": "Rural_Estadual",
+ # "Municipal.2": "Rural_Municipal",
+ # "Privada.2": "Rural_Privada",
+ },
+ "Educacao Especial - Classes Comuns": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Dependência Administrativa_Pública",
+ "Federal": "Dependência Administrativa_Federal",
+ "Estadual": "Dependência Administrativa_Estadual",
+ "Municipal": "Dependência Administrativa_Municipal",
+ "Privada": "Dependência Administrativa_Privada",
+ "Pública.1": "Urbana_Pública",
+ "Federal.1": "Urbana_Federal",
+ "Estadual.1": "Urbana_Estadual",
+ "Municipal.1": "Urbana_Municipal",
+ "Privada.1": "Urbana_Privada",
+ "Pública.2": "Rural_Pública",
+ "Federal.2": "Rural_Federal",
+ "Estadual.2": "Rural_Estadual",
+ "Municipal.2": "Rural_Municipal",
+ "Privada.2": "Rural_Privada",
+ #####
+ # Para 2014
+ ####
+ # "Unnamed: 1": "uf",
+ # "Unnamed: 3": "id_municipio",
+ # "Pública ": "Dependência Administrativa_Pública",
+ # "Federal": "Dependência Administrativa_Federal",
+ # "Estadual": "Dependência Administrativa_Estadual",
+ # "Municipal": "Dependência Administrativa_Municipal",
+ # "Privada": "Dependência Administrativa_Privada",
+ # "Pública": "Urbana_Pública",
+ # "Federal.1": "Urbana_Federal",
+ # "Estadual.1": "Urbana_Estadual",
+ # "Municipal.1": "Urbana_Municipal",
+ # "Privada.1": "Urbana_Privada",
+ # "Pública.1": "Rural_Pública",
+ # "Federal.2": "Rural_Federal",
+ # "Estadual.2": "Rural_Estadual",
+ # "Municipal.2": "Rural_Municipal",
+ # "Privada.2": "Rural_Privada",
+ },
+ "Educacao Especial - Classes Exclusivas": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Dependência Administrativa_Pública",
+ "Federal": "Dependência Administrativa_Federal",
+ "Estadual": "Dependência Administrativa_Estadual",
+ "Municipal": "Dependência Administrativa_Municipal",
+ "Privada": "Dependência Administrativa_Privada",
+ "Pública.1": "Urbana_Pública",
+ "Federal.1": "Urbana_Federal",
+ "Estadual.1": "Urbana_Estadual",
+ "Municipal.1": "Urbana_Municipal",
+ "Privada.1": "Urbana_Privada",
+ "Pública.2": "Rural_Pública",
+ "Federal.2": "Rural_Federal",
+ "Estadual.2": "Rural_Estadual",
+ "Municipal.2": "Rural_Municipal",
+ "Privada.2": "Rural_Privada",
+ },
+}
+
+
+localizacao = {
+ "educacao_basica": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Basica"],
+ "chave": "2.2",
+ "valor": "Educacao Basica",
+ "skiprows": 8,
+ "table": "docente_localizacao",
+ },
+ "educacao_infantil": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil"],
+ "chave": "2.6", # Para o ano de 2010
+ #"chave": "2.7",
+ "valor": "Educacao Infantil",
+ "skiprows": 8,
+ "table": "docente_localizacao",
+ },
+ "ensino_infantil_creche": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil - Creche"],
+ "chave": "Creche 2.7", # Para o ano de 2010
+ #"chave": "Creche 2.8",
+ "valor": "Educacao Infantil - Creche",
+ "skiprows": 8,
+ "table": "docente_localizacao",
+ },
+ "educacao_infantil_pre_escola": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil - Pré-Escola"],
+ "chave": "Pré-Escola 2.10", # Para o ano de 2010
+ #"chave": "Pré-Escola 2.12",
+ "valor": "Educacao Infantil - Pré-Escola",
+ "skiprows": 8,
+ "table": "docente_localizacao",
+ },
+ "ensino_fundamental": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental"],
+ #"chave": "2.17",
+ "chave": "2.14", # Para o ano de 2010
+ "valor": "Ensino Fundamental",
+ "skiprows": 8,
+ "table": "docente_localizacao",
+ },
+ "ensino_fundamental_anos_iniciais": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental - Anos Iniciais"],
+ #"chave": "Anos Iniciais 2.21",
+ "chave": "Anos Iniciais 2.17", # Para o ano de 2010
+ "valor": "Ensino Fundamental - Anos Iniciais",
+ "skiprows": 8,
+ "table": "docente_localizacao",
+ },
+ "ensino_fundamental_anos_finais": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental - Anos Finais"],
+ #"chave": "Anos Finais 2.25",
+ "chave": "Anos Finais 2.20", # Para o ano de 2010
+ "valor": "Ensino Fundamental - Anos Finais",
+ "skiprows": 8,
+ "table": "docente_localizacao",
+ },
+ "ensino_medio": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Médio"],
+ #"chave": "Ensino Médio 2.29",
+ "chave": "Ensino Médio 2.23", # Para o ano de 2010
+ "valor": "Ensino Médio",
+ "skiprows": 8,
+ "table": "docente_localizacao",
+ },
+ "educacao_profissional": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Profissional"],
+ #"chave": "2.34",
+ "chave": "2.27", # Para o ano de 2010
+ "valor": "Educacao Profissional",
+ "skiprows": 8,
+ "table": "docente_localizacao",
+ },
+ "EJA": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["EJA"],
+ #"chave": "2.39",
+ "chave": "2.31", # Para o ano de 2010
+ "valor": "EJA",
+ "skiprows": 8,
+ "table": "docente_localizacao",
+ },
+ "educacao_especial_classes_comuns": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Especial - Classes Comuns"],
+ #"chave": "2.45",
+ "chave": "2.36", # Para o ano de 2010
+ "valor": "Educacao Especial - Classes Comuns",
+ "skiprows": 8,
+ "table": "docente_localizacao",
+ },
+ "educacao_especial_classes_exclusivas": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE[
+ "Educacao Especial - Classes Exclusivas"
+ ],
+ #"chave": "2.51",
+ #"chave": "2.50", # Para o ano de 2011
+ "chave": "2.40", # Para o ano de 2010
+ "valor": "Educacao Especial - Classes Exclusivas",
+ "skiprows": 8,
+ "table": "docente_localizacao",
+ },
+}
+
+
+def read_sheet(
+ table: str, ano: int, chave: str, valor: str, dicionario: dict, skiprows: int = 9
+) -> pd.DataFrame:
+ print("Tratando dados de", valor, ano)
+ path_excel = os.path.join(
+ INPUT,
+ f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}",
+ f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}.xlsx",
+ )
+ df = pd.read_excel(
+ path_excel,
+ skiprows=skiprows,
+ sheet_name=chave,
+ )
+
+ sheets_etapa_ensino_serie = {chave: valor}
+
+ df_localizacao = {
+ name: pd.read_excel(
+ path_excel,
+ skiprows=skiprows,
+ sheet_name=sheet_name,
+ )
+ for sheet_name, name in sheets_etapa_ensino_serie.items()
+ }
+
+ dataframes = {}
+ for table_name, columns in df_localizacao.items():
+ df = pd.DataFrame(columns)
+ dataframes[table_name] = df
+
+ print(df.columns)
+
+ def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:
+ cols_drop = [
+ col
+ for col in df.columns
+ if col.startswith("Unnamed") or col.startswith("Total")
+ ]
+
+ return df.drop(columns=cols_drop)
+
+ dfs_localizacao = {
+ name: drop_unused_columns(df.rename(columns=dicionario, errors="raise"))
+ for name, df in df_localizacao.items()
+ }
+
+ df_localizacao = pd.concat(
+ [
+ df.pipe(
+ lambda d: d.loc[(d["id_municipio"].notna()) & (d["id_municipio"] != " "),]
+ )
+ .pipe(
+ lambda d: pd.melt(
+ d,
+ id_vars=["id_municipio", "uf"],
+ value_vars=d.columns.difference(
+ ["id_municipio", "uf"]
+ ).tolist(), # Convert to list
+ var_name="localizacao",
+ value_name="quantidade_docente",
+ )
+ )
+ .assign(tipo_classe=tipo_classe)
+ for tipo_classe, df in dfs_localizacao.items()
+ ]
+ )
+
+ bd_dir = bd.read_sql(
+ "SELECT nome, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`",
+ billing_project_id="basedosdados",
+ reauth=False,
+ )
+
+ df_localizacao["uf"] = (
+ df_localizacao["uf"]
+ .apply(lambda uf: uf.strip())
+ .replace({i["nome"]: i["sigla"] for i in bd_dir.to_dict("records")}) # type: ignore
+ )
+
+ df_localizacao = df_localizacao.rename(columns={"uf": "sigla_uf"}, errors="raise")
+
+ df_localizacao["rede"] = df_localizacao["localizacao"].apply(lambda v: v.split("_")[-1])
+
+ df_localizacao["localizacao"] = df_localizacao["localizacao"].apply(
+ lambda v: v.split("_")[0]
+ )
+ df_localizacao["quantidade_docente"] = df_localizacao["quantidade_docente"].astype(int)
+
+ df_localizacao = df_localizacao[
+ [
+ "sigla_uf",
+ "id_municipio",
+ "tipo_classe",
+ "rede",
+ "localizacao",
+ "quantidade_docente",
+ ]
+ ]
+
+ print("Particionando dados")
+ for sigla_uf, df in df_localizacao.groupby("sigla_uf"):
+ path = os.path.join(OUTPUT, f"{table}", f"ano={ano}", f"sigla_uf={sigla_uf}")
+ if not os.path.exists(path):
+ os.makedirs(path, exist_ok=True)
+ df.drop(columns=["sigla_uf"]).to_csv(
+ os.path.join(path, "data.csv"), index=False, mode="w"
+ )
+ else:
+ df.drop(columns=["sigla_uf"]).to_csv(
+ os.path.join(path, "data.csv"), index=False, mode="a", header=False
+ )
+if __name__ == "__main__":
+ lista = [
+ "educacao_basica",
+ "educacao_infantil",
+ "ensino_infantil_creche",
+ "educacao_infantil_pre_escola",
+ "ensino_fundamental",
+ "ensino_fundamental_anos_iniciais",
+ "ensino_fundamental_anos_finais",
+ "ensino_medio",
+ "educacao_profissional",
+ "EJA",
+ "educacao_especial_classes_comuns",
+ "educacao_especial_classes_exclusivas"
+ ]
+
+ for x in lista:
+ read_sheet(
+ table=localizacao[x]["table"],
+ ano=2007,
+ chave=localizacao[x]["chave"],
+ valor=localizacao[x]["valor"],
+ dicionario=localizacao[x]["dicionario"],
+ skiprows=localizacao[x]["skiprows"],
+ )
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_regime_contratacao.py b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_regime_contratacao.py
new file mode 100644
index 00000000..3e8e50a4
--- /dev/null
+++ b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_regime_contratacao.py
@@ -0,0 +1,586 @@
+import os
+import zipfile
+import pandas as pd
+import basedosdados as bd
+import numpy as np
+
+
+INPUT = os.path.join(os.getcwd(), "input")
+OUTPUT = os.path.join(os.getcwd(), "output")
+
+# os.makedirs(INPUT, exist_ok=True)
+# os.makedirs(OUTPUT, exist_ok=True)
+
+#####
+# Para anos anteriores a 2011
+#####
+
+# RENAMES_CONTRATO = {
+# "Educacao Basica": {
+# "Unnamed: 1": "uf",
+# "Unnamed: 3": "id_municipio",
+# "Federal": "Concursado_Federal",
+# "Estadual": "Concursado_Estadual",
+# "Municipal": "Concursado_Municipal",
+# "Federal.1": "Contrato Temporário_Federal",
+# "Estadual.1": "Contrato Temporário_Estadual",
+# "Municipal.1": "Contrato Temporário_Municipal",
+# "Federal.2": "Contrato Terceirizado_Federal",
+# "Estadual.2": "Contrato Terceirizado_Estadual",
+# "Municipal.2": "Contrato Terceirizado_Municipal",
+# "Federal.3": "Contrato CLT_Federal",
+# "Estadual.3": "Contrato CLT_Estadual",
+# "Municipal.3": "Contrato CLT_Municipal",
+# },
+# "Educacao Infantil - Creche": {
+# "Unnamed: 1": "uf",
+# "Unnamed: 3": "id_municipio",
+# "Federal": "Concursado_Federal",
+# "Estadual": "Concursado_Estadual",
+# "Municipal": "Concursado_Municipal",
+# "Federal.1": "Contrato Temporário_Federal",
+# "Estadual.1": "Contrato Temporário_Estadual",
+# "Municipal.1": "Contrato Temporário_Municipal",
+# "Federal.2": "Contrato Terceirizado_Federal",
+# "Estadual.2": "Contrato Terceirizado_Estadual",
+# "Municipal.2": "Contrato Terceirizado_Municipal",
+# "Federal.3": "Contrato CLT_Federal",
+# "Estadual.3": "Contrato CLT_Estadual",
+# "Municipal.3": "Contrato CLT_Municipal",
+# },
+# "Educacao Infantil - Pré-Escola": {
+# "Unnamed: 1": "uf",
+# "Unnamed: 3": "id_municipio",
+# "Federal": "Concursado_Federal",
+# "Estadual": "Concursado_Estadual",
+# "Municipal": "Concursado_Municipal",
+# "Federal.1": "Contrato Temporário_Federal",
+# "Estadual.1": "Contrato Temporário_Estadual",
+# "Municipal.1": "Contrato Temporário_Municipal",
+# "Federal.2": "Contrato Terceirizado_Federal",
+# "Estadual.2": "Contrato Terceirizado_Estadual",
+# "Municipal.2": "Contrato Terceirizado_Municipal",
+# "Federal.3": "Contrato CLT_Federal",
+# "Estadual.3": "Contrato CLT_Estadual",
+# "Municipal.3": "Contrato CLT_Municipal",
+# },
+# "Ensino Fundamental": {
+# "Unnamed: 1": "uf",
+# "Unnamed: 3": "id_municipio",
+# "Federal": "Concursado_Federal",
+# "Estadual": "Concursado_Estadual",
+# "Municipal": "Concursado_Municipal",
+# "Federal.1": "Contrato Temporário_Federal",
+# "Estadual.1": "Contrato Temporário_Estadual",
+# "Municipal.1": "Contrato Temporário_Municipal",
+# "Federal.2": "Contrato Terceirizado_Federal",
+# "Estadual.2": "Contrato Terceirizado_Estadual",
+# "Municipal.2": "Contrato Terceirizado_Municipal",
+# "Federal.3": "Contrato CLT_Federal",
+# "Estadual.3": "Contrato CLT_Estadual",
+# "Municipal.3": "Contrato CLT_Municipal",
+# },
+# "Ensino Fundamental - Anos Iniciais": {
+# "Unnamed: 1": "uf",
+# "Unnamed: 3": "id_municipio",
+# "Federal": "Concursado_Federal",
+# "Estadual": "Concursado_Estadual",
+# "Municipal": "Concursado_Municipal",
+# "Federal.1": "Contrato Temporário_Federal",
+# "Estadual.1": "Contrato Temporário_Estadual",
+# "Municipal.1": "Contrato Temporário_Municipal",
+# "Federal.2": "Contrato Terceirizado_Federal",
+# "Estadual.2": "Contrato Terceirizado_Estadual",
+# "Municipal.2": "Contrato Terceirizado_Municipal",
+# "Federal.3": "Contrato CLT_Federal",
+# "Estadual.3": "Contrato CLT_Estadual",
+# "Municipal.3": "Contrato CLT_Municipal",
+# },
+# "Ensino Fundamental - Anos Finais": {
+# "Unnamed: 1": "uf",
+# "Unnamed: 3": "id_municipio",
+# "Federal": "Concursado_Federal",
+# "Estadual": "Concursado_Estadual",
+# "Municipal": "Concursado_Municipal",
+# "Federal.1": "Contrato Temporário_Federal",
+# "Estadual.1": "Contrato Temporário_Estadual",
+# "Municipal.1": "Contrato Temporário_Municipal",
+# "Federal.2": "Contrato Terceirizado_Federal",
+# "Estadual.2": "Contrato Terceirizado_Estadual",
+# "Municipal.2": "Contrato Terceirizado_Municipal",
+# "Federal.3": "Contrato CLT_Federal",
+# "Estadual.3": "Contrato CLT_Estadual",
+# "Municipal.3": "Contrato CLT_Municipal",
+# },
+# "Ensino Médio": {
+# "Unnamed: 1": "uf",
+# "Unnamed: 3": "id_municipio",
+# "Federal": "Concursado_Federal",
+# "Estadual": "Concursado_Estadual",
+# "Municipal": "Concursado_Municipal",
+# "Federal.1": "Contrato Temporário_Federal",
+# "Estadual.1": "Contrato Temporário_Estadual",
+# "Municipal.1": "Contrato Temporário_Municipal",
+# "Federal.2": "Contrato Terceirizado_Federal",
+# "Estadual.2": "Contrato Terceirizado_Estadual",
+# "Municipal.2": "Contrato Terceirizado_Municipal",
+# "Federal.3": "Contrato CLT_Federal",
+# "Estadual.3": "Contrato CLT_Estadual",
+# "Municipal.3": "Contrato CLT_Municipal",
+# },
+# "Educacao Profissional": {
+# "Unnamed: 1": "uf",
+# "Unnamed: 3": "id_municipio",
+# "Federal": "Concursado_Federal",
+# "Estadual": "Concursado_Estadual",
+# "Municipal": "Concursado_Municipal",
+# "Federal.1": "Contrato Temporário_Federal",
+# "Estadual.1": "Contrato Temporário_Estadual",
+# "Municipal.1": "Contrato Temporário_Municipal",
+# "Federal.2": "Contrato Terceirizado_Federal",
+# "Estadual.2": "Contrato Terceirizado_Estadual",
+# "Municipal.2": "Contrato Terceirizado_Municipal",
+# "Federal.3": "Contrato CLT_Federal",
+# "Estadual.3": "Contrato CLT_Estadual",
+# "Municipal.3": "Contrato CLT_Municipal",
+# },
+# "EJA": {
+# "Unnamed: 1": "uf",
+# "Unnamed: 3": "id_municipio",
+# "Federal": "Concursado_Federal",
+# "Estadual": "Concursado_Estadual",
+# "Municipal": "Concursado_Municipal",
+# "Federal.1": "Contrato Temporário_Federal",
+# "Estadual.1": "Contrato Temporário_Estadual",
+# "Municipal.1": "Contrato Temporário_Municipal",
+# "Federal.2": "Contrato Terceirizado_Federal",
+# "Estadual.2": "Contrato Terceirizado_Estadual",
+# "Municipal.2": "Contrato Terceirizado_Municipal",
+# "Federal.3": "Contrato CLT_Federal",
+# "Estadual.3": "Contrato CLT_Estadual",
+# "Municipal.3": "Contrato CLT_Municipal",
+# },
+# "Educacao Especial - Classes Comuns": {
+# "Unnamed: 1": "uf",
+# "Unnamed: 3": "id_municipio",
+# "Federal": "Concursado_Federal",
+# "Estadual": "Concursado_Estadual",
+# "Municipal": "Concursado_Municipal",
+# "Federal.1": "Contrato Temporário_Federal",
+# "Estadual.1": "Contrato Temporário_Estadual",
+# "Municipal.1": "Contrato Temporário_Municipal",
+# "Federal.2": "Contrato Terceirizado_Federal",
+# "Estadual.2": "Contrato Terceirizado_Estadual",
+# "Municipal.2": "Contrato Terceirizado_Municipal",
+# "Federal.3": "Contrato CLT_Federal",
+# "Estadual.3": "Contrato CLT_Estadual",
+# "Municipal.3": "Contrato CLT_Municipal",
+# },
+# "Educacao Especial - Classes Exclusivas": {
+# "Unnamed: 1": "uf",
+# "Unnamed: 3": "id_municipio",
+# "Federal": "Concursado_Federal",
+# "Estadual": "Concursado_Estadual",
+# "Municipal": "Concursado_Municipal",
+# "Federal.1": "Contrato Temporário_Federal",
+# "Estadual.1": "Contrato Temporário_Estadual",
+# "Municipal.1": "Contrato Temporário_Municipal",
+# "Federal.2": "Contrato Terceirizado_Federal",
+# "Estadual.2": "Contrato Terceirizado_Estadual",
+# "Municipal.2": "Contrato Terceirizado_Municipal",
+# "Federal.3": "Contrato CLT_Federal",
+# "Estadual.3": "Contrato CLT_Estadual",
+# "Municipal.3": "Contrato CLT_Municipal",
+# },
+# }
+
+RENAMES_CONTRATO = { # Para anos anteriores a 2011
+ "Educacao Basica": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Federal": "Concursado_Federal",
+ "Estadual": "Concursado_Estadual",
+ "Municipal": "Concursado_Municipal",
+ "Federal.1": "Contrato Temporário_Federal",
+ "Estadual.1": "Contrato Temporário_Estadual",
+ "Municipal.1": "Contrato Temporário_Municipal",
+ "Federal.2": "Contrato Terceirizado_Federal",
+ "Estadual.2": "Contrato Terceirizado_Estadual",
+ "Municipal.2": "Contrato Terceirizado_Municipal",
+
+ },
+ "Educacao Infantil - Creche": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Federal": "Concursado_Federal",
+ "Estadual": "Concursado_Estadual",
+ "Municipal": "Concursado_Municipal",
+ "Federal.1": "Contrato Temporário_Federal",
+ "Estadual.1": "Contrato Temporário_Estadual",
+ "Municipal.1": "Contrato Temporário_Municipal",
+ "Federal.2": "Contrato Terceirizado_Federal",
+ "Estadual.2": "Contrato Terceirizado_Estadual",
+ "Municipal.2": "Contrato Terceirizado_Municipal",
+
+ },
+ "Educacao Infantil - Pré-Escola": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Federal": "Concursado_Federal",
+ "Estadual": "Concursado_Estadual",
+ "Municipal": "Concursado_Municipal",
+ "Federal.1": "Contrato Temporário_Federal",
+ "Estadual.1": "Contrato Temporário_Estadual",
+ "Municipal.1": "Contrato Temporário_Municipal",
+ "Federal.2": "Contrato Terceirizado_Federal",
+ "Estadual.2": "Contrato Terceirizado_Estadual",
+ "Municipal.2": "Contrato Terceirizado_Municipal",
+
+ },
+ "Ensino Fundamental": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Federal": "Concursado_Federal",
+ "Estadual": "Concursado_Estadual",
+ "Municipal": "Concursado_Municipal",
+ "Federal.1": "Contrato Temporário_Federal",
+ "Estadual.1": "Contrato Temporário_Estadual",
+ "Municipal.1": "Contrato Temporário_Municipal",
+ "Federal.2": "Contrato Terceirizado_Federal",
+ "Estadual.2": "Contrato Terceirizado_Estadual",
+ "Municipal.2": "Contrato Terceirizado_Municipal",
+
+ },
+ "Ensino Fundamental - Anos Iniciais": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Federal": "Concursado_Federal",
+ "Estadual": "Concursado_Estadual",
+ "Municipal": "Concursado_Municipal",
+ "Federal.1": "Contrato Temporário_Federal",
+ "Estadual.1": "Contrato Temporário_Estadual",
+ "Municipal.1": "Contrato Temporário_Municipal",
+ "Federal.2": "Contrato Terceirizado_Federal",
+ "Estadual.2": "Contrato Terceirizado_Estadual",
+ "Municipal.2": "Contrato Terceirizado_Municipal",
+
+ },
+ "Ensino Fundamental - Anos Finais": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Federal": "Concursado_Federal",
+ "Estadual": "Concursado_Estadual",
+ "Municipal": "Concursado_Municipal",
+ "Federal.1": "Contrato Temporário_Federal",
+ "Estadual.1": "Contrato Temporário_Estadual",
+ "Municipal.1": "Contrato Temporário_Municipal",
+ "Federal.2": "Contrato Terceirizado_Federal",
+ "Estadual.2": "Contrato Terceirizado_Estadual",
+ "Municipal.2": "Contrato Terceirizado_Municipal",
+
+ },
+ "Ensino Médio": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Federal": "Concursado_Federal",
+ "Estadual": "Concursado_Estadual",
+ "Municipal": "Concursado_Municipal",
+ "Federal.1": "Contrato Temporário_Federal",
+ "Estadual.1": "Contrato Temporário_Estadual",
+ "Municipal.1": "Contrato Temporário_Municipal",
+ "Federal.2": "Contrato Terceirizado_Federal",
+ "Estadual.2": "Contrato Terceirizado_Estadual",
+ "Municipal.2": "Contrato Terceirizado_Municipal",
+
+ },
+ "Educacao Profissional": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Federal": "Concursado_Federal",
+ "Estadual": "Concursado_Estadual",
+ "Municipal": "Concursado_Municipal",
+ "Federal.1": "Contrato Temporário_Federal",
+ "Estadual.1": "Contrato Temporário_Estadual",
+ "Municipal.1": "Contrato Temporário_Municipal",
+ "Federal.2": "Contrato Terceirizado_Federal",
+ "Estadual.2": "Contrato Terceirizado_Estadual",
+ "Municipal.2": "Contrato Terceirizado_Municipal",
+
+ },
+ "EJA": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Federal": "Concursado_Federal",
+ "Estadual": "Concursado_Estadual",
+ "Municipal": "Concursado_Municipal",
+ "Federal.1": "Contrato Temporário_Federal",
+ "Estadual.1": "Contrato Temporário_Estadual",
+ "Municipal.1": "Contrato Temporário_Municipal",
+ "Federal.2": "Contrato Terceirizado_Federal",
+ "Estadual.2": "Contrato Terceirizado_Estadual",
+ "Municipal.2": "Contrato Terceirizado_Municipal",
+
+ },
+ "Educacao Especial - Classes Comuns": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Federal": "Concursado_Federal",
+ "Estadual": "Concursado_Estadual",
+ "Municipal": "Concursado_Municipal",
+ "Federal.1": "Contrato Temporário_Federal",
+ "Estadual.1": "Contrato Temporário_Estadual",
+ "Municipal.1": "Contrato Temporário_Municipal",
+ "Federal.2": "Contrato Terceirizado_Federal",
+ "Estadual.2": "Contrato Terceirizado_Estadual",
+ "Municipal.2": "Contrato Terceirizado_Municipal",
+
+ },
+ "Educacao Especial - Classes Exclusivas": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Federal": "Concursado_Federal",
+ "Estadual": "Concursado_Estadual",
+ "Municipal": "Concursado_Municipal",
+ "Federal.1": "Contrato Temporário_Federal",
+ "Estadual.1": "Contrato Temporário_Estadual",
+ "Municipal.1": "Contrato Temporário_Municipal",
+ "Federal.2": "Contrato Terceirizado_Federal",
+ "Estadual.2": "Contrato Terceirizado_Estadual",
+ "Municipal.2": "Contrato Terceirizado_Municipal",
+
+ },
+}
+
+
+regime_contrato = {
+ "educacao_basica": {
+ "dicionario": RENAMES_CONTRATO["Educacao Basica"],
+ "chave": "2.5",
+ "valor": "Educacao Basica",
+ # "skiprows": 8,
+ "skiprows": 9, # Para o ano de 2021
+ "table": "docente_regime_contrato",
+ },
+ "ensino_infantil_creche": {
+ "dicionario": RENAMES_CONTRATO["Educacao Infantil - Creche"],
+ "chave": "2.11",
+ "valor": "Educacao Infantil - Creche",
+ # "skiprows": 8,
+ "skiprows": 9, # Para o ano de 2021
+ "table": "docente_regime_contrato",
+ },
+ "educacao_infantil_pre_escola": {
+ "dicionario": RENAMES_CONTRATO["Educacao Infantil - Pré-Escola"],
+ "chave": "2.15",
+ "valor": "Educacao Infantil - Pré-Escola",
+ # "skiprows": 8,
+ "skiprows": 9, # Para o ano de 2021
+ "table": "docente_regime_contrato",
+ },
+ "ensino_fundamental": {
+ "dicionario": RENAMES_CONTRATO["Ensino Fundamental"],
+ "chave": "2.20",
+ "valor": "Ensino Fundamental",
+ # "skiprows": 8,
+ "skiprows": 9, # Para o ano de 2021
+ "table": "docente_regime_contrato",
+ },
+ "ensino_fundamental_anos_iniciais": {
+ "dicionario": RENAMES_CONTRATO["Ensino Fundamental - Anos Iniciais"],
+ "chave": "2.24",
+ "valor": "Ensino Fundamental - Anos Iniciais",
+ # "skiprows": 8,
+ "skiprows": 9, # Para o ano de 2021
+ "table": "docente_regime_contrato",
+ },
+ "ensino_fundamental_anos_finais": {
+ "dicionario": RENAMES_CONTRATO["Ensino Fundamental - Anos Finais"],
+ "chave": "2.28",
+ "valor": "Ensino Fundamental - Anos Finais",
+ # "skiprows": 8,
+ "skiprows": 9, # Para o ano de 2021
+ "table": "docente_regime_contrato",
+ },
+ "ensino_medio": {
+ "dicionario": RENAMES_CONTRATO["Ensino Médio"],
+ "chave": "2.32",
+ "valor": "Ensino Médio",
+ # "skiprows": 8,
+ "skiprows": 9, # Para o ano de 2021
+ "table": "docente_regime_contrato",
+ },
+ "educacao_profissional": {
+ "dicionario": RENAMES_CONTRATO["Educacao Profissional"],
+ "chave": "2.37",
+ "valor": "Educacao Profissional",
+ # "skiprows": 8,
+ "skiprows": 9, # Para o ano de 2021
+ "table": "docente_regime_contrato",
+ },
+ "EJA": {
+ "dicionario": RENAMES_CONTRATO["EJA"],
+ "chave": "2.42",
+ "valor": "EJA",
+ # "skiprows": 8,
+ "skiprows": 9, # Para o ano de 2021
+ "table": "docente_regime_contrato",
+ },
+ "educacao_especial_classes_comuns": {
+ "dicionario": RENAMES_CONTRATO["Educacao Especial - Classes Comuns"],
+ # "chave": "2.49",
+ "chave": "2.48", # Para o ano de 2010
+ "valor": "Educacao Especial - Classes Comuns",
+ # "skiprows": 8,
+ "skiprows": 9, # Para o ano de 2021
+ #"skiprows": 10, # Para o ano de 2011
+ "table": "docente_regime_contrato",
+ },
+ "educacao_especial_classes_exclusivas": {
+ "dicionario": RENAMES_CONTRATO["Educacao Especial - Classes Exclusivas"],
+ # "chave": "2.55",
+ "chave": "2.53", # Para o ano de 2021
+ "valor": "Educacao Especial - Classes Exclusivas",
+ # "skiprows": 8,
+ "skiprows": 9, # Para o ano de 2021
+ #"skiprows": 10, # Para o ano de 2011
+ "table": "docente_regime_contrato",
+ },
+}
+
+
+def read_sheet(
+ table: str, ano: int, chave: str, valor: str, dicionario: dict, skiprows
+) -> pd.DataFrame:
+ print("Tratando dados de", valor, ano)
+ path_excel = os.path.join(
+ INPUT,
+ f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}",
+ f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}.xlsx",
+ )
+
+ df = pd.read_excel(
+ path_excel,
+ skiprows=skiprows,
+ sheet_name=chave,
+ )
+
+ sheets_etapa_ensino_serie = {chave: valor}
+
+ dfs_regime_contrato = {
+ name: pd.read_excel(
+ path_excel, skiprows=skiprows, sheet_name=sheet_name
+ )
+ for sheet_name, name in sheets_etapa_ensino_serie.items()
+ }
+
+ dataframes = {}
+
+ for table_name, columns in dfs_regime_contrato.items():
+ df = pd.DataFrame(columns) # Create DataFrame for each table
+ dataframes[table_name] = df # Store the DataFrame in a dictionary
+
+ print(df.columns)
+
+ def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:
+ cols_drop = [
+ col
+ for col in df.columns
+ if col.startswith("Unnamed") or col.startswith("Total")
+ ]
+
+ return df.drop(columns=cols_drop)
+
+ dfs_regime_contrato = {
+ name: drop_unused_columns(df.rename(columns=dicionario, errors="raise"))
+ for name, df in dfs_regime_contrato.items()
+ }
+
+ df_regime_contrato = pd.concat(
+ [
+ df.pipe(
+ lambda d: d.loc[
+ (d["id_municipio"].notna()) & (d["id_municipio"] != " "),
+ ]
+ )
+ .pipe(
+ lambda d: pd.melt(
+ d,
+ id_vars=["id_municipio", "uf"],
+ value_vars=d.columns.difference(
+ ["id_municipio", "uf"]
+ ).tolist(), # Convert to list
+ var_name="regime_contrato",
+ value_name="quantidade_docente",
+ )
+ )
+ .assign(tipo_classe=tipo_classe)
+ for tipo_classe, df in dfs_regime_contrato.items()
+ ]
+ )
+
+ bd_dir = bd.read_sql(
+ "SELECT nome, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`",
+ billing_project_id="basedosdados",
+ reauth=False,
+ )
+
+ df_regime_contrato["uf"] = df_regime_contrato["uf"].apply(lambda uf: uf.strip()).replace({i["nome"]: i["sigla"] for i in bd_dir.to_dict("records")}) # type: ignore
+
+ df_regime_contrato = df_regime_contrato.rename(
+ columns={"uf": "sigla_uf"}, errors="raise"
+ )
+
+ df_regime_contrato["rede"] = df_regime_contrato["regime_contrato"].apply(
+ lambda v: v.split("_")[-1]
+ )
+
+ df_regime_contrato["regime_contrato"] = df_regime_contrato["regime_contrato"].apply(
+ lambda v: v.split("_")[0]
+ )
+
+ df_regime_contrato["quantidade_docente"] = df_regime_contrato[
+ "quantidade_docente"
+ ].astype(int)
+
+ df_regime_contrato["quantidade_docente"] = df_regime_contrato[
+ "quantidade_docente"
+ ].astype(int)
+
+ print("Particionando dados")
+ for sigla_uf, df in df_regime_contrato.groupby("sigla_uf"):
+ path = os.path.join(OUTPUT, f"{table}", f"ano={ano}", f"sigla_uf={sigla_uf}")
+ if not os.path.exists(path):
+ os.makedirs(path, exist_ok=True)
+ df.drop(columns=["sigla_uf"]).to_csv(
+ os.path.join(path, "data.csv"), index=False, mode="w"
+ )
+ else:
+ df.drop(columns=["sigla_uf"]).to_csv(
+ os.path.join(path, "data.csv"), index=False, mode="a", header=False
+ )
+
+if __name__ == "__main__":
+ lista = [
+ "educacao_basica",
+ "ensino_infantil_creche",
+ "educacao_infantil_pre_escola",
+ "ensino_fundamental",
+ "ensino_fundamental_anos_iniciais",
+ "ensino_fundamental_anos_finais",
+ "ensino_medio",
+ "educacao_profissional",
+ "EJA",
+ "educacao_especial_classes_comuns",
+ "educacao_especial_classes_exclusivas",
+ ]
+
+ for x in lista:
+ # for ano in range(2012, 2019):
+ read_sheet(
+ table=regime_contrato[x]["table"],
+ ano=2010,
+ chave=regime_contrato[x]["chave"],
+ valor=regime_contrato[x]["valor"],
+ dicionario=regime_contrato[x]["dicionario"],
+ skiprows=regime_contrato[x]["skiprows"],
+ )
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docentes_etapa_ensino.py b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docentes_etapa_ensino.py
new file mode 100644
index 00000000..0cfbb7e3
--- /dev/null
+++ b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docentes_etapa_ensino.py
@@ -0,0 +1,489 @@
+import os
+import zipfile
+import pandas as pd
+import basedosdados as bd
+import numpy as np
+
+INPUT = os.path.join(os.getcwd(), "input")
+OUTPUT = os.path.join(os.getcwd(), "output")
+
+# os.makedirs(INPUT, exist_ok=True)
+# os.makedirs(OUTPUT, exist_ok=True)
+
+RENAMES_ETAPA_ENSINO_SERIE = {
+ "Educacao Basica": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Creche": "Educação Infantil - Creche",
+ "Pré-Escola11": "Educação Infantil - Pré Escola",
+ "Total12": "",
+ "Anos Iniciais13": "Ensino Fundamental - Anos Iniciais",
+ "Anos Finais14": "Ensino Fundamental - Anos Finais",
+ "Ensino Médio Propedêutico": "Ensino Médio - Propedêutico",
+ "Ensino Médio Normal/Magistério": "Ensino Médio - Normal/Magistério",
+ "Curso Técnico Integrado (Ensino Médio Integrado)": "Ensino Médio - Curso Técnico Integrado",
+ "Associada ao Ensino Médio18": "Educação Profissional Técnica de Nível Médio - Associada ao Ensino Médio",
+ "Curso Técnico Concomitante": "Educação Profissional Técnica de Nível Médio - Curso Técnico Concomitante",
+ "Curso Técnico Subsequente": "Educação Profissional Técnica de Nível Médio - Curso Técnico Subsequente",
+ "Curso Técnico Misto (Concomitante e Subsequente)": "Educação Profissional Técnica de Nível Médio - Curso Técnico Misto (Concomitante e Subsequente)",
+ "Curso FIC Concomitante": "Educação Profissional - Curso FIC Concomitante",
+ "Curso FIC Integrado na Modalidade EJA20": "Educação Profissional - Curso FIC Integrado na Modalidade EJA",
+ "Ensino Fundamental22": "EJA - Ensino Fundamental",
+ "Ensino Médio23": "EJA - Ensino Médio",
+ "Classes Comuns25": "Educação Especial - Classes Comuns",
+ "Classes Exclusivas26": "Educação Especial - Classes Exclusivas",
+ #####
+ # Para valores anteriores a 2016 e 2013
+ ####
+ # "Classes Comuns": "Educação Especial - Classes Comuns",
+ # "Classes Exclusivas": "Educação Especial - Classes Exclusivas",
+ },
+ "Educacao Infantil": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Creche - Pública",
+ "Federal": "Creche - Federal",
+ "Estadual": "Creche - Estadual",
+ "Municipal": "Creche - Municipal",
+ "Privada": "Creche - Privada",
+ "Pública.1": "Pré-Escola - Pública",
+ "Federal.1": "Pré-Escola - Federal",
+ "Estadual.1": "Pré-Escola - Estadual",
+ "Municipal.1": "Pré-Escola - Municipal",
+ "Privada.1": "Pré-Escola - Privada",
+ },
+ "Ensino Fundamental": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Anos Iniciais - Pública",
+ "Federal": "Anos Iniciais - Federal",
+ "Estadual": "Anos Iniciais - Estadual",
+ "Municipal": "Anos Iniciais - Municipal",
+ "Privada": "Anos Iniciais - Privada",
+ "Pública.1": "Anos Finais - Pública",
+ "Federal.1": "Anos Finais - Federal",
+ "Estadual.1": "Anos Finais - Estadual",
+ "Municipal.1": "Anos Finais - Municipal",
+ "Privada.1": "Anos Finais - Privada",
+ "Pública.2": "Turmas Multi - Pública",
+ "Federal.2": "Turmas Multi - Federal",
+ "Estadual.2": "Turmas Multi - Estadual",
+ "Municipal.2": "Turmas Multi - Municipal",
+ "Privada.2": "Turmas Multi - Privada",
+ },
+ "Educacao Profissional": #{
+
+ # "Unnamed: 1": "uf",
+ # "Unnamed: 3": "id_municipio",
+ # "Pública": "Curso Técnico Integrado (Ensino Médio Integrado) - Pública",
+ # "Federal": "Curso Técnico Integrado (Ensino Médio Integrado) - Federal",
+ # "Estadual": "Curso Técnico Integrado (Ensino Médio Integrado) - Estadual",
+ # "Municipal": "Curso Técnico Integrado (Ensino Médio Integrado) - Municipal",
+ # "Privada": "Curso Técnico Integrado (Ensino Médio Integrado) - Privada",
+ # "Pública.1": "Ensino Médio Normal/Magistério - Pública",
+ # "Federal.1": "Ensino Médio Normal/Magistério - Federal",
+ # "Estadual.1": "Ensino Médio Normal/Magistério - Estadual",
+ # "Municipal.1": "Ensino Médio Normal/Magistério - Municipal",
+ # "Privada.1": "Ensino Médio Normal/Magistério - Privada",
+ # "Pública.2": "Curso Técnico Concomitante - Pública",
+ # "Federal.2": "Curso Técnico Concomitante - Federal",
+ # "Estadual.2": "Curso Técnico Concomitante - Estadual",
+ # "Municipal.2": "Curso Técnico Concomitante - Municipal",
+ # "Privada.2": "Curso Técnico Concomitante - Privada",
+ # "Pública.3": "Curso Técnico Subsequente - Pública",
+ # "Federal.3": "Curso Técnico Subsequente - Federal",
+ # "Estadual.3": "Curso Técnico Subsequente - Estadual",
+ # "Municipal.3": "Curso Técnico Subsequente - Municipal",
+ # "Privada.3": "Curso Técnico Subsequente - Privada",
+ # "Pública.4": "Curso Técnico Misto (Concomitante e Subsequente) - Pública",
+ # "Federal.4": "Curso Técnico Misto (Concomitante e Subsequente) - Federal",
+ # "Estadual.4": "Curso Técnico Misto (Concomitante e Subsequente) - Estadual",
+ # "Municipal.4": "Curso Técnico Misto (Concomitante e Subsequente) - Municipal",
+ # "Privada.4": "Curso Técnico Misto (Concomitante e Subsequente) - Privada",
+ # "Pública.5": "Curso Técnico Integrado a EJA - Pública",
+ # "Federal.5": "Curso Técnico Integrado a EJA - Federal",
+ # "Estadual.5": "Curso Técnico Integrado a EJA - Estadual",
+ # "Municipal.5": "Curso Técnico Integrado a EJA - Municipal",
+ # "Privada.5": "Curso Técnico Integrado a EJA - Privada",
+ # "Pública.6": "EJA Ensino Fundamental Projovem Urbano - Pública",
+ # "Federal.6": "EJA Ensino Fundamental Projovem Urbano - Federal",
+ # "Estadual.6": "EJA Ensino Fundamental Projovem Urbano - Estadual",
+ # "Municipal.6": "EJA Ensino Fundamental Projovem Urbano - Municipal",
+ # "Privada.6": "EJA Ensino Fundamental Projovem Urbano - Privada",
+ # "Pública.7": "Curso FIC Concomitante - Pública",
+ # "Federal.7": "Curso FIC Concomitante - Federal",
+ # "Estadual.7": "Curso FIC Concomitante - Estadual",
+ # "Municipal.7": "Curso FIC Concomitante - Municipal",
+ # "Privada.7": "Curso FIC Concomitante - Privada",
+ # "Pública.8": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Pública",
+ # "Federal.8": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Federal",
+ # "Estadual.8": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Estadual",
+ # "Municipal.8": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Municipal",
+ # "Privada.8": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Privada",
+ # "Pública.9": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Pública",
+ # "Federal.9": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Federal",
+ # "Estadual.9": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Estadual",
+ # "Municipal.9": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Municipal",
+ # "Privada.9": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Privada",
+ #},
+ #####
+ # Valores antes depois de 2018
+ ####
+ {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Curso Técnico Integrado (Ensino Médio Integrado) - Pública",
+ "Federal": "Curso Técnico Integrado (Ensino Médio Integrado) - Federal",
+ "Estadual": "Curso Técnico Integrado (Ensino Médio Integrado) - Estadual",
+ "Municipal": "Curso Técnico Integrado (Ensino Médio Integrado) - Municipal",
+ "Privada": "Curso Técnico Integrado (Ensino Médio Integrado) - Privada",
+ "Pública.1": "Ensino Médio Normal/Magistério - Pública",
+ "Federal.1": "Ensino Médio Normal/Magistério - Federal",
+ "Estadual.1": "Ensino Médio Normal/Magistério - Estadual",
+ "Municipal.1": "Ensino Médio Normal/Magistério - Municipal",
+ "Privada.1": "Ensino Médio Normal/Magistério - Privada",
+ "Pública.2": "Curso Técnico Concomitante - Pública",
+ "Federal.2": "Curso Técnico Concomitante - Federal",
+ "Estadual.2": "Curso Técnico Concomitante - Estadual",
+ "Municipal.2": "Curso Técnico Concomitante - Municipal",
+ "Privada.2": "Curso Técnico Concomitante - Privada",
+ "Pública.3": "Curso Técnico Subsequente - Pública",
+ "Federal.3": "Curso Técnico Subsequente - Federal",
+ "Estadual.3": "Curso Técnico Subsequente - Estadual",
+ "Municipal.3": "Curso Técnico Subsequente - Municipal",
+ "Privada.3": "Curso Técnico Subsequente - Privada",
+ "Pública.4": "Curso Técnico Misto (Concomitante e Subsequente) - Pública",
+ "Federal.4": "Curso Técnico Misto (Concomitante e Subsequente) - Federal",
+ "Estadual.4": "Curso Técnico Misto (Concomitante e Subsequente) - Estadual",
+ "Municipal.4": "Curso Técnico Misto (Concomitante e Subsequente) - Municipal",
+ "Privada.4": "Curso Técnico Misto (Concomitante e Subsequente) - Privada",
+ "Pública.5": "Curso Técnico Integrado a EJA - Pública",
+ "Federal.5": "Curso Técnico Integrado a EJA - Federal",
+ "Estadual.5": "Curso Técnico Integrado a EJA - Estadual",
+ "Municipal.5": "Curso Técnico Integrado a EJA - Municipal",
+ "Privada.5": "Curso Técnico Integrado a EJA - Privada",
+ "Pública.6": "Curso FIC Concomitante - Pública",
+ "Federal.6": "Curso FIC Concomitante - Federal",
+ "Estadual.6": "Curso FIC Concomitante - Estadual",
+ "Municipal.6": "Curso FIC Concomitante - Municipal",
+ "Privada.6": "Curso FIC Concomitante - Privada",
+ "Pública.7": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Pública",
+ "Federal.7": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Federal",
+ "Estadual.7": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Estadual",
+ "Municipal.7": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Municipal",
+ "Privada.7": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Privada",
+ "Pública.8": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Pública",
+ "Federal.8": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Federal",
+ "Estadual.8": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Estadual",
+ "Municipal.8": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Municipal",
+ "Privada.8": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Privada"},
+ "EJA": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Pública": "Ensino Fundamental - Pública",
+ "Federal": "Ensino Fundamental - Federal",
+ "Estadual": "Ensino Fundamental - Estadual",
+ "Municipal": "Ensino Fundamental - Municipal",
+ "Privada": "Ensino Fundamental - Privada",
+ "Pública.1": "Ensino Médio - Pública",
+ "Federal.1": "Ensino Médio - Federal",
+ "Estadual.1": "Ensino Médio - Estadual",
+ "Municipal.1": "Ensino Médio - Municipal",
+ "Privada.1": "Ensino Médio - Privada",
+ },
+ "Educacao Especial": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Creche": "Educação Infantil - Creche",
+ "Pré-Escola11": "Educação Infantil - Pré Escola",
+ "Anos Iniciais13": "Ensino Fundamental - Anos Iniciais",
+ "Anos Finais14": "Ensino Fundamental - Anos Finais",
+ "Ensino Médio Propedêutico": "Ensino Médio - Propedêutico",
+ "Ensino Médio Normal/Magistério": "Ensino Médio - Normal/Magistério",
+ "Curso Técnico Integrado (Ensino Médio Integrado)": "Ensino Médio - Curso Técnico Integrado",
+ "Associada ao Ensino Médio18": "Educação Profissional Técnica de Nível Médio - Associada ao Ensino Médio",
+ "Curso Técnico Concomitante": "Educação Profissional Técnica de Nível Médio - Curso Técnico Concomitante",
+ "Curso Técnico Subsequente": "Educação Profissional Técnica de Nível Médio - Curso Técnico Subsequente",
+ "Curso Técnico Misto (Concomitante e Subsequente)": "Educação Profissional Técnica de Nível Médio - Curso Técnico Misto (Concomitante e Subsequente)",
+ "Curso FIC Concomitante": "Educação Profissional - Curso FIC Concomitante",
+ "Curso FIC Integrado na Modalidade EJA20": "Educação Profissional - Curso FIC Integrado na Modalidade EJA",
+ "Ensino Fundamental22": "EJA - Ensino Fundamental",
+ "Ensino Médio23": "EJA - Ensino Médio",
+ "Classes Comuns25": "Educação Especial - Classes Comuns",
+ "Classes Exclusivas26": "Educação Especial - Classes Exclusivas",
+ #####
+ # Para valores anteriores a 2016 e 2013
+ ####
+ # "Classes Comuns": "Educação Especial - Classes Comuns",
+ # "Classes Exclusivas": "Educação Especial - Classes Exclusivas",
+ },
+ "Educacao Especial - Classes Comuns": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Creche": "Educação Infantil - Creche",
+ "Pré-Escola11": "Educação Infantil - Pré Escola",
+ "Anos Iniciais13": "Ensino Fundamental - Anos Iniciais",
+ "Anos Finais14": "Ensino Fundamental - Anos Finais",
+ "Ensino Médio Propedêutico": "Ensino Médio - Propedêutico",
+ "Ensino Médio Normal/ Magistério": "Ensino Médio - Normal/Magistério",
+ "Ensino Médio Curso Técnico Integrado (Ensino Médio Integrado)": "Ensino Médio - Curso Técnico Integrado",
+ "Associada ao Ensino Médio18": "Educação Profissional Técnica de Nível Médio - Associada ao Ensino Médio",
+ "Curso Técnico Concomitante": "Educação Profissional Técnica de Nível Médio - Curso Técnico Concomitante",
+ "Curso Técnico Subsequente": "Educação Profissional Técnica de Nível Médio - Curso Técnico Subsequente",
+ "Curso Técnico Misto (Concomitante e Subsequente)": "Educação Profissional Técnica de Nível Médio - Curso Técnico Misto (Concomitante e Subsequente)",
+ "Curso FIC Concomitante": "Educação Profissional - Curso FIC Concomitante",
+ "Curso FIC Integrado na Modalidade EJA20": "Educação Profissional - Curso FIC Integrado na Modalidade EJA",
+ "Ensino Fundamental22": "EJA - Ensino Fundamental",
+ "Ensino Médio23": "EJA - Ensino Médio",
+ },
+ "Educacao Especial - Classes Exclusivas": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Creche": "Educação Infantil - Creche",
+ "Pré-Escola11": "Educação Infantil - Pré Escola",
+ "Anos Iniciais13": "Ensino Fundamental - Anos Iniciais",
+ "Anos Finais14": "Ensino Fundamental - Anos Finais",
+ "Ensino Médio Propedêutico": "Ensino Médio - Propedêutico",
+ "Ensino Médio Normal/ Magistério": "Ensino Médio - Normal/Magistério",
+ "Curso Técnico Integrado (Ensino Médio Integrado)": "Ensino Médio - Curso Técnico Integrado",
+ "Associada ao Ensino Médio18": "Educação Profissional Técnica de Nível Médio - Associada ao Ensino Médio",
+ "Curso Técnico Concomitante": "Educação Profissional Técnica de Nível Médio - Curso Técnico Concomitante",
+ "Curso Técnico Subsequente": "Educação Profissional Técnica de Nível Médio - Curso Técnico Subsequente",
+ "Curso Técnico Misto (Concomitante e Subsequente)": "Educação Profissional Técnica de Nível Médio - Curso Técnico Misto (Concomitante e Subsequente)",
+ "Curso FIC Concomitante": "Educação Profissional - Curso FIC Concomitante",
+ "Curso FIC Integrado na Modalidade EJA20": "Educação Profissional - Curso FIC Integrado na Modalidade EJA",
+ "Ensino Fundamental22": "EJA - Ensino Fundamental",
+ "Ensino Médio23": "EJA - Ensino Médio",
+ },
+ "Educacao Indigena": {
+ "Unnamed: 1": "uf",
+ "Unnamed: 3": "id_municipio",
+ "Creche": "Educação Infantil - Creche",
+ "Pré-Escola11": "Educação Infantil - Pré Escola",
+ "Anos Iniciais13": "Ensino Fundamental - Anos Iniciais",
+ "Anos Finais14": "Ensino Fundamental - Anos Finais",
+ "Ensino Médio Propedêutico": "Ensino Médio - Propedêutico",
+ "Ensino Médio Normal/Magistério": "Ensino Médio - Normal/Magistério",
+ "Curso Técnico Integrado (Ensino Médio Integrado)": "Ensino Médio - Curso Técnico Integrado",
+ "Associada ao Ensino Médio18": "Educação Profissional Técnica de Nível Médio - Associada ao Ensino Médio",
+ "Curso Técnico Concomitante": "Educação Profissional Técnica de Nível Médio - Curso Técnico Concomitante",
+ "Curso Técnico Subsequente": "Educação Profissional Técnica de Nível Médio - Curso Técnico Subsequente",
+ "Curso Técnico Misto (Concomitante e Subsequente)": "Educação Profissional Técnica de Nível Médio - Curso Técnico Misto (Concomitante e Subsequente)",
+ "Curso FIC Concomitante": "Educação Profissional - Curso FIC Concomitante",
+ "Curso FIC Integrado na Modalidade EJA20": "Educação Profissional - Curso FIC Integrado na Modalidade EJA",
+ "Ensino Fundamental22": "EJA - Ensino Fundamental",
+ "Ensino Médio23": "EJA - Ensino Médio",
+ "Classes Comuns25": "Educação Especial - Classes Comuns",
+ "Classes Exclusivas26": "Educação Especial - Classes Exclusivas",
+ }}
+
+
+etapa_ensino = {
+ "educacao_basica": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Basica"],
+ "chave": "Educação Básica 2.1",
+ "valor": "Educacao Basica",
+ "skiprows": 8,
+ "table": "docente_etapa_ensino",
+ },
+ "educacao_infantil": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil"],
+ "chave": "Educação Infantil 2.6",
+ #"chave": "Educação Infantil 2.5", # Em 2010, a chave é 2.5
+ "valor": "Educacao Infantil",
+ "skiprows": 8,
+ "table": "docente_etapa_ensino",
+ },
+ "ensino_fundamental": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental"],
+ "chave": "Ensino Fundamental 2.16",
+ #"chave": "Ensino Fundamental 2.13", # Em 2010, a chave é 2.13
+ "valor": "Ensino Fundamental",
+ "skiprows": 8,
+ "table": "docente_etapa_ensino",
+ },
+ "educacao_profissional": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Profissional"],
+ "chave": "Educação Profissional 2.33",
+ #"chave": "Educação Profissional 2.26", # Em 2010, a chave é 26
+ "valor": "Ensino Profissional",
+ "skiprows": 9,
+ "table": "docente_etapa_ensino",
+ },
+ "EJA": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["EJA"],
+ "chave": "EJA 2.38",
+ #"chave": "EJA 2.30", # Em 2010, a chave é 2.30
+ "valor": "EJA",
+ "skiprows": 8,
+ "table": "docente_etapa_ensino",
+ },
+ "educacao_especial": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Especial"],
+ "chave": "Educação Especial 2.43",
+ #"chave": "Educação Especial 2.34", # Em 2010, a chave é 2.34
+ "valor": "Educacao Especial",
+ "skiprows": 8,
+ "table": "docente_etapa_ensino",
+ },
+ "educacao_especial_classes_comuns": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Especial - Classes Comuns"],
+ "chave": "Classes Comuns 2.44",
+ #"chave": "Classes Comuns 2.35", # Em 2010, a chave é 2.35
+ "valor": "Educacao Especial - Classes Comuns",
+ "skiprows": 8,
+ "table": "docente_etapa_ensino",
+ },
+ "educacao_especial_classes_exclusivas": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Especial - Classes Exclusivas"],
+ "chave": "Classes Exclusivas 2.50",
+ #"chave": "Classes Exclusivas 2.49", # Em 2011, a chave é 2.49
+ #"chave" : "Classes Exclusivas 2.39", # Em 2010, a chave é 2.39
+ "valor": "Educacao Especial - Classes Exclusivas",
+ "skiprows": 8,
+ "table": "docente_etapa_ensino",
+ },
+ "educacao_indigena": {
+ "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Indigena"],
+ "chave": "Educação Indígena 2.56",
+ "valor": "Educacao Indigena",
+ "skiprows": 9,
+ "table": "docente_etapa_ensino",
+ },
+}
+
+
+def read_sheet(
+ table: str, ano: int, chave: str, valor: str, dicionario: dict, skiprows: int = 9
+) -> pd.DataFrame:
+ print("Tratando dados de", valor)
+ df = pd.read_excel(
+ os.path.join(
+ INPUT,
+ f"Sinopse_Estatistica_da_Educacao_Basica_{ano}",
+ f"Sinopse_Estatistica_da_Educacao_Basica_{ano}.xlsx",
+ ),
+ skiprows=skiprows,
+ sheet_name=chave,
+ )
+
+ sheets_etapa_ensino_serie = {
+ chave: valor
+ }
+
+ dfs_etapa_ensino_serie = {
+ name: pd.read_excel(
+ os.path.join(
+ INPUT,
+ f"Sinopse_Estatistica_da_Educacao_Basica_{ano}",
+ f"Sinopse_Estatistica_da_Educacao_Basica_{ano}.xlsx",
+ ),
+ skiprows=skiprows,
+ sheet_name=sheet_name,
+ )
+ for sheet_name, name in sheets_etapa_ensino_serie.items()
+ }
+
+ dataframes = {}
+ for table_name, columns in dfs_etapa_ensino_serie.items():
+ df = pd.DataFrame(columns)
+ dataframes[table_name] = df
+
+ print(df.columns)
+
+ def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:
+ cols_drop = [
+ col
+ for col in df.columns
+ if col.startswith("Unnamed") or col.startswith("Total")
+ ]
+
+ return df.drop(columns=cols_drop)
+
+ dfs_etapa_ensino_serie = {
+ name: drop_unused_columns(df.rename(columns=dicionario, errors="raise"))
+ for name, df in dfs_etapa_ensino_serie.items()
+ }
+
+ df_etapa_ensino = pd.concat(
+ [
+ df.pipe(
+ lambda d: d.loc[
+ (d["id_municipio"].notna()) & (d["id_municipio"] != " "),
+ ]
+ )
+ .pipe(
+ lambda d: pd.melt(
+ d,
+ id_vars=["id_municipio", "uf"],
+ value_vars=d.columns.difference(
+ ["id_municipio", "uf"]
+ ).tolist(), # Convert to list
+ var_name="etapa_ensino",
+ value_name="quantidade_docentes",
+ )
+ )
+ .assign(tipo_classe=tipo_classe)
+ for tipo_classe, df in dfs_etapa_ensino_serie.items()
+ ]
+ )
+
+ df_etapa_ensino["etapa_ensino"] = (
+ df_etapa_ensino["etapa_ensino"].str.strip().replace("", np.nan).dropna()
+ )
+
+ bd_dir = bd.read_sql(
+ "SELECT nome, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`",
+ billing_project_id="basedosdados",
+ reauth=False,
+ )
+
+ df_etapa_ensino["etapa_ensino"] = (
+ df_etapa_ensino["etapa_ensino"].str.strip().replace("", np.nan)
+ )
+
+ df_etapa_ensino['quantidade_docentes'] = df_etapa_ensino['quantidade_docentes'].astype(int)
+
+ df_etapa_ensino = df_etapa_ensino[pd.notna(df_etapa_ensino["etapa_ensino"])]
+ df_etapa_ensino["uf"] = (
+ df_etapa_ensino["uf"]
+ .apply(lambda uf: uf.strip())
+ .replace({i["nome"]: i["sigla"] for i in bd_dir.to_dict("records")}) # type: ignore
+ )
+ df_etapa_ensino = df_etapa_ensino.rename(columns={"uf": "sigla_uf"}, errors="raise")
+ for sigla_uf, df in df_etapa_ensino.groupby("sigla_uf"):
+ path = os.path.join(OUTPUT, f"{table}", f"ano={ano}", f"sigla_uf={sigla_uf}")
+ if not os.path.exists(path):
+ os.makedirs(path, exist_ok=True)
+ df.drop(columns=["sigla_uf"]).to_csv(
+ os.path.join(path, "data.csv"), index=False, mode="w"
+ )
+ else:
+ df.drop(columns=["sigla_uf"]).to_csv(
+ os.path.join(path, "data.csv"), index=False, mode="a", header=False
+ )
+
+ return df_etapa_ensino
+
+if __name__ == "__main__":
+ lista = [
+ "educacao_basica",
+ "educacao_infantil",
+ "ensino_fundamental",
+ "educacao_profissional",
+ "EJA",
+ "educacao_especial",
+ "educacao_especial_classes_comuns",
+ "educacao_especial_classes_exclusivas",
+ #"educacao_indigena",
+ ]
+
+ for x in lista:
+ read_sheet(
+ table=etapa_ensino[x]["table"],
+ ano=2021,
+ chave=etapa_ensino[x]["chave"],
+ valor=etapa_ensino[x]["valor"],
+ dicionario=etapa_ensino[x]["dicionario"],
+ skiprows=etapa_ensino[x]["skiprows"],
+ )
diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/schema.yml b/models/br_inep_sinopse_estatistica_educacao_basica/schema.yml
index 253a0460..93301eac 100644
--- a/models/br_inep_sinopse_estatistica_educacao_basica/schema.yml
+++ b/models/br_inep_sinopse_estatistica_educacao_basica/schema.yml
@@ -165,12 +165,18 @@ models:
description: Raça/Cor
- name: quantidade_matricula
description: Número de matrículas
- - name: br_inep_sinopse_estatistica_educacao_basica__educacao_especial_etapa_ensino
- description: Número de alunos matriculados na Educação Especial ao longo do tempo
- por tipo de classe (Comuns ou Exclusivas) e Etapa de Ensino
+ - name: br_inep_sinopse_estatistica_educacao_basica__docente_escolaridade
+ description: A base conta com o total de docentes por município, escolaridade
+ e rede
tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns:
+ - ano
+ - id_municipio
+ - escolaridade
+ - tipo_classe
- not_null_proportion_multiple_columns:
- at_least: 0.05
+ at_least: 0.95
columns:
- name: ano
description: Ano
@@ -191,19 +197,25 @@ models:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
- name: tipo_classe
- description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes
- Exclusivas/Especiais)
- - name: etapa_ensino
- description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos
- Iniciais, Anos Finais, Ensino Fundamental - EJA, ...)
- - name: quantidade_matricula
- description: Número de matrículas
- - name: br_inep_sinopse_estatistica_educacao_basica__educacao_especial_faixa_etaria
- description: Número de alunos matriculados na Educação Especial ao longo do tempo
- por tipo de classe (Comuns ou Exclusivas) e faixa etária
+ description: 'Tipo de classe da Educação Especial (e.g.: Classes Comuns ou
+ Classes Exclusivas/Especiais)'
+ - name: escolaridade
+ description: Escolaridade
+ - name: quantidade_docente
+ description: Número de Docentes
+ - name: br_inep_sinopse_estatistica_educacao_basica__docente_regime_contrato
+ description: A base conta com o total de docentes por município, regime de contrato
+ e rede
tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns:
+ - ano
+ - id_municipio
+ - regime_contrato
+ - rede
+ - tipo_classe
- not_null_proportion_multiple_columns:
- at_least: 0.05
+ at_least: 0.95
columns:
- name: ano
description: Ano
@@ -224,18 +236,27 @@ models:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
- name: tipo_classe
- description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes
- Exclusivas/Especiais)
- - name: faixa_etaria
- description: Faixa etária
- - name: quantidade_matricula
- description: Número de matrículas
- - name: br_inep_sinopse_estatistica_educacao_basica__educacao_especial_localizacao
- description: Número de alunos matriculados na Educação Especial ao longo do tempo
- por tipo de classe, rede e localização
+ description: 'Tipo de classe da Educação Especial (e.g.: Classes Comuns ou
+ Classes Exclusivas/Especiais)'
+ - name: rede
+ description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada)
+ - name: regime_contrato
+ description: Regime de contratação
+ - name: quantidade_docente
+ description: Número de Docentes
+ - name: br_inep_sinopse_estatistica_educacao_basica__docente_faixa_etaria_sexo
+ description: A base conta com o total de docentes por município, faixa etária
+ e sexo
tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns:
+ - ano
+ - id_municipio
+ - faixa_etaria
+ - sexo
+ - tipo_classe
- not_null_proportion_multiple_columns:
- at_least: 0.05
+ at_least: 0.95
columns:
- name: ano
description: Ano
@@ -256,20 +277,25 @@ models:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
- name: tipo_classe
- description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes
- Exclusivas/Especiais)
- - name: rede
- description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada)
- - name: localizacao
- description: Localização (e.g. Zona Urbana, Zona Rural)
- - name: quantidade_matricula
- description: Número de matrículas
- - name: br_inep_sinopse_estatistica_educacao_basica__educacao_especial_sexo_raca_cor
- description: Número de alunos matriculados na Educação Especial ao longo do tempo
- por tipo de classe, sexo e raça/cor
+ description: 'Tipo de classe da Educação Especial (e.g.: Classes Comuns ou
+ Classes Exclusivas/Especiais)'
+ - name: faixa_etaria
+ description: Faixa Etária dos docentes
+ - name: sexo
+ description: Sexo dos docentes
+ - name: quantidade_docente
+ description: Número de Docentes
+ - name: br_inep_sinopse_estatistica_educacao_basica__docente_deficiencia
+ description: A base conta com o total de docentes por município, tipo de deficiência
tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns:
+ - ano
+ - id_municipio
+ - deficiencia
+ - tipo_classe
- not_null_proportion_multiple_columns:
- at_least: 0.05
+ at_least: 0.95
columns:
- name: ano
description: Ano
@@ -290,20 +316,25 @@ models:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
- name: tipo_classe
- description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes
- Exclusivas/Especiais)
- - name: sexo
- description: Sexo
- - name: raca_cor
- description: Raça/Cor
- - name: quantidade_matricula
- description: Número de matrículas
- - name: br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tempo_ensino
- description: Número de alunos matriculados na Educação Especial ao longo do tempo
- por tipo de classe, rede e tempo de ensino (Integral ou Parcial)
+ description: 'Tipo de classe da Educação Especial (e.g.: Classes Comuns ou
+ Classes Exclusivas/Especiais)'
+ - name: deficiencia
+ description: Tipo de deficiência, transtorno global do desenvolvimento ou
+ altas habilidades/superdotação
+ - name: quantidade_docente
+ description: Número de docentes
+ - name: br_inep_sinopse_estatistica_educacao_basica__docente_localizacao
+ description: A base conta com o total de docentes por município, rede e localização
tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns:
+ - ano
+ - id_municipio
+ - rede
+ - localizacao
+ - tipo_classe
- not_null_proportion_multiple_columns:
- at_least: 0.05
+ at_least: 0.95
columns:
- name: ano
description: Ano
@@ -324,21 +355,26 @@ models:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
- name: tipo_classe
- description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes
- Exclusivas/Especiais)
+ description: 'Tipo de classe da Educação Especial (e.g.: Classes Comuns ou
+ Classes Exclusivas/Especiais)'
- name: rede
description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada)
- - name: tempo_ensino
- description: Classificação em tempo integral e tempo parcial
- - name: quantidade_matricula
- description: Número de matrículas
- - name: br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tipo_deficiencia
- description: Número de alunos matriculados na Educação Especial ao longo do tempo
- por tipo de classe e tipo de deficiência, transtorno global do desenvolvimento
- ou altas habilidades/superdotação
+ - name: localizacao
+ description: Localização (e.g. Zona Urbana, Zona Rural)
+ - name: quantidade_docente
+ description: Número de docentes
+ - name: br_inep_sinopse_estatistica_educacao_basica__docente_etapa_ensino
+ description: A base conta com o total de docentes por município, faixa etária
+ e sexo
tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns:
+ - ano
+ - id_municipio
+ - etapa_ensino
+ - tipo_classe
- not_null_proportion_multiple_columns:
- at_least: 0.05
+ at_least: 0.95
columns:
- name: ano
description: Ano
@@ -359,10 +395,10 @@ models:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
- name: tipo_classe
- description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes
- Exclusivas/Especiais)
- - name: tipo_deficiêcia
- description: Tipo de deficiência, transtorno global do desenvolvimento ou
- altas habilidades/superdotação
- - name: quantidade_matricula
- description: Número de matrículas
+ description: 'Tipo de classe da Educação Especial (e.g.: Classes Comuns ou
+ Classes Exclusivas/Especiais)'
+ - name: etapa_ensino
+ description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos
+ Iniciais, Anos Finais, Ensino Fundamental - EJA, ...)
+ - name: quantidade_docente
+ description: Número de Docentes
diff --git a/models/br_me_rais/br_me_rais__dicionario.sql b/models/br_me_rais/br_me_rais__dicionario.sql
index 8c55fa33..99d08533 100644
--- a/models/br_me_rais/br_me_rais__dicionario.sql
+++ b/models/br_me_rais/br_me_rais__dicionario.sql
@@ -1,5 +1,4 @@
{{ config(alias="dicionario", schema="br_me_rais") }}
--- Dicionário da Rais
select
safe_cast(id_tabela as string) id_tabela,
safe_cast(nome_coluna as string) nome_coluna,
diff --git a/models/br_me_rais/br_me_rais__microdados_estabelecimentos.sql b/models/br_me_rais/br_me_rais__microdados_estabelecimentos.sql
index 8a0fedc7..09db0eee 100644
--- a/models/br_me_rais/br_me_rais__microdados_estabelecimentos.sql
+++ b/models/br_me_rais/br_me_rais__microdados_estabelecimentos.sql
@@ -2,7 +2,7 @@
config(
alias="microdados_estabelecimentos",
schema="br_me_rais",
- materialized="table",
+ materialized="incremental",
partition_by={
"field": "ano",
"data_type": "int64",
@@ -11,10 +11,11 @@
cluster_by=["sigla_uf"],
)
}}
+
select
safe_cast(ano as int64) ano,
safe_cast(sigla_uf as string) sigla_uf,
- safe_cast(id_municipio as string) id_municipio,
+ safe_cast(regexp_replace(id_municipio, r'\.0$', '') as string) id_municipio,
safe_cast(quantidade_vinculos_ativos as int64) quantidade_vinculos_ativos,
safe_cast(quantidade_vinculos_clt as int64) quantidade_vinculos_clt,
safe_cast(
@@ -23,7 +24,7 @@ select
safe_cast(natureza as string) natureza_estabelecimento,
safe_cast(natureza_juridica as string) natureza_juridica,
safe_cast(tamanho as string) tamanho_estabelecimento,
- safe_cast(tipo as string) tipo_estabelecimento,
+ safe_cast(regexp_replace(tipo, r'^0+', '') as string) as tipo_estabelecimento,
safe_cast(indicador_cei_vinculado as int64) indicador_cei_vinculado,
safe_cast(indicador_pat as int64) indicador_pat,
safe_cast(indicador_simples as string) indicador_simples,
@@ -32,22 +33,28 @@ select
safe_cast(cnae_1 as string) cnae_1,
safe_cast(cnae_2 as string) cnae_2,
safe_cast(cnae_2_subclasse as string) cnae_2_subclasse,
- cast(
- cast(regexp_replace(subsetor_ibge, r'^0+', '') as string) as string
- ) as subsetor_ibge,
+ safe_cast(regexp_replace(subsetor_ibge, r'^0+', '') as string) as subsetor_ibge,
safe_cast(subatividade_ibge as string) subatividade_ibge,
case
- when length(cep) = 7 then lpad(cep, 8, '0') else cast(cep as string)
+ when length(cep) = 7 then lpad(cep, 8, '0') else safe_cast(cep as string)
end as cep,
case
when bairros_sp = '????????????'
then null
- else cast(regexp_replace(bairros_sp, r'^0+', '') as string)
+ else trim(safe_cast(regexp_replace(bairros_sp, r'^0+', '') as string))
end as bairros_sp,
- cast(regexp_replace(distritos_sp, r'^0+', '') as string) as distritos_sp,
- cast(regexp_replace(bairros_fortaleza, r'^0+', '') as string) as bairros_fortaleza,
- nullif(cast(regexp_replace(bairros_rj, r'^0+', '') as string), '') as bairros_rj,
- cast(
- regexp_replace(regioes_administrativas_df, r'^0+', '') as string
+ trim(safe_cast(regexp_replace(distritos_sp, r'^0+', '') as string)) as distritos_sp,
+ trim(
+ safe_cast(regexp_replace(bairros_fortaleza, r'^0+', '') as string)
+ ) as bairros_fortaleza,
+ trim(
+ nullif(safe_cast(regexp_replace(bairros_rj, r'^0+', '') as string), '')
+ ) as bairros_rj,
+ trim(
+ safe_cast(regexp_replace(regioes_administrativas_df, r'^0+', '') as string)
) as regioes_administrativas_df
from `basedosdados-staging.br_me_rais_staging.microdados_estabelecimentos` as t
+{% if is_incremental() %}
+ where
+ safe_cast(ano as int64) > (select safe_cast(max(ano) as int64) from {{ this }})
+{% endif %}
diff --git a/models/br_me_rais/br_me_rais__microdados_vinculos.sql b/models/br_me_rais/br_me_rais__microdados_vinculos.sql
index 8b2a475b..fc11a07c 100644
--- a/models/br_me_rais/br_me_rais__microdados_vinculos.sql
+++ b/models/br_me_rais/br_me_rais__microdados_vinculos.sql
@@ -2,7 +2,7 @@
config(
alias="microdados_vinculos",
schema="br_me_rais",
- materialized="table",
+ materialized="incremental",
partition_by={
"field": "ano",
"data_type": "int64",
@@ -15,7 +15,7 @@
select
safe_cast(ano as int64) ano,
safe_cast(sigla_uf as string) sigla_uf,
- safe_cast(id_municipio as string) id_municipio,
+ safe_cast(regexp_replace(id_municipio, r'\.0$', '') as string) id_municipio,
safe_cast(tipo_vinculo as string) tipo_vinculo,
safe_cast(vinculo_ativo_3112 as string) vinculo_ativo_3112,
safe_cast(tipo_admissao as string) tipo_admissao,
@@ -27,7 +27,7 @@ select
safe_cast(causa_desligamento_3 as string) causa_desligamento_3,
safe_cast(faixa_tempo_emprego as string) faixa_tempo_emprego,
safe_cast(faixa_horas_contratadas as string) faixa_horas_contratadas,
- safe_cast(tempo_emprego as float64) tempo_emprego,
+ round(safe_cast(tempo_emprego as float64), 2) tempo_emprego,
safe_cast(quantidade_horas_contratadas as int64) quantidade_horas_contratadas,
safe_cast(id_municipio_trabalho as string) id_municipio_trabalho,
safe_cast(quantidade_dias_afastamento as int64) quantidade_dias_afastamento,
@@ -37,10 +37,14 @@ select
indicador_trabalho_intermitente as string
) indicador_trabalho_intermitente,
safe_cast(faixa_remuneracao_media_sm as string) faixa_remuneracao_media_sm,
- safe_cast(valor_remuneracao_media_sm as float64) valor_remuneracao_media_sm,
+ round(
+ safe_cast(valor_remuneracao_media_sm as float64), 2
+ ) valor_remuneracao_media_sm,
safe_cast(valor_remuneracao_media as float64) valor_remuneracao_media,
safe_cast(faixa_remuneracao_dezembro_sm as string) faixa_remuneracao_dezembro_sm,
- safe_cast(valor_remuneracao_dezembro_sm as float64) valor_remuneracao_dezembro_sm,
+ round(
+ safe_cast(valor_remuneracao_dezembro_sm as float64), 2
+ ) valor_remuneracao_dezembro_sm,
safe_cast(valor_remuneracao_janeiro as float64) valor_remuneracao_janeiro,
safe_cast(valor_remuneracao_fevereiro as float64) valor_remuneracao_fevereiro,
safe_cast(valor_remuneracao_marco as float64) valor_remuneracao_marco,
@@ -82,19 +86,23 @@ select
then 'Não identificado'
when tipo_estabelecimento = 'CEI/CNO'
then 'CEI'
- else tipo_estabelecimento
+ else safe_cast(regexp_replace(tipo_estabelecimento, r'^0+', '') as string)
end as tipo_estabelecimento,
safe_cast(natureza_juridica as string) natureza_juridica,
safe_cast(indicador_simples as string) indicador_simples,
- cast(cast(regexp_replace(bairros_sp, r'^0+', '') as int64) as string) as bairros_sp,
- cast(
- cast(regexp_replace(distritos_sp, r'^0+', '') as int64) as string
- ) as distritos_sp,
- cast(
- cast(regexp_replace(bairros_fortaleza, r'^0+', '') as int64) as string
+ trim(safe_cast(regexp_replace(bairros_sp, r'^0+', '') as string)) as bairros_sp,
+ trim(safe_cast(regexp_replace(distritos_sp, r'^0+', '') as string)) as distritos_sp,
+ trim(
+ safe_cast(regexp_replace(bairros_fortaleza, r'^0+', '') as string)
) as bairros_fortaleza,
- cast(cast(regexp_replace(bairros_rj, r'^0+', '') as int64) as string) as bairros_rj,
- cast(
- cast(regexp_replace(regioes_administrativas_df, r'^0+', '') as int64) as string
- ) as regioes_administrativas_df,
+ trim(
+ nullif(safe_cast(regexp_replace(bairros_rj, r'^0+', '') as string), '')
+ ) as bairros_rj,
+ trim(
+ safe_cast(regexp_replace(regioes_administrativas_df, r'^0+', '') as string)
+ ) as regioes_administrativas_df
from `basedosdados-staging.br_me_rais_staging.microdados_vinculos`
+{% if is_incremental() %}
+ where
+ safe_cast(ano as int64) > (select safe_cast(max(ano) as int64) from {{ this }})
+{% endif %}
diff --git a/models/br_me_rais/code/rais_estabelecimento.ipynb b/models/br_me_rais/code/rais_estabelecimento.ipynb
new file mode 100755
index 00000000..100937a3
--- /dev/null
+++ b/models/br_me_rais/code/rais_estabelecimento.ipynb
@@ -0,0 +1,275 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import os\n",
+ "import basedosdados as bd\n",
+ "import numpy as np\n",
+ "from datetime import datetime\n",
+ "from os.path import join\n",
+ "from pathlib import Path\n",
+ "from typing import Any, Dict, List, Optional, Tuple, Union\n",
+ "pd.set_option(\"display.max_columns\", None)\n",
+ "\n",
+ "def to_partitions(\n",
+ " data: pd.DataFrame,\n",
+ " partition_columns: List[str],\n",
+ " savepath: str,\n",
+ " file_type: str = \"csv\",\n",
+ "):\n",
+ " \"\"\"Save data in to hive patitions schema, given a dataframe and a list of partition columns.\n",
+ " Args:\n",
+ " data (pandas.core.frame.DataFrame): Dataframe to be partitioned.\n",
+ " partition_columns (list): List of columns to be used as partitions.\n",
+ " savepath (str, pathlib.PosixPath): folder path to save the partitions.\n",
+ " file_type (str): default to csv. Accepts parquet.\n",
+ " Exemple:\n",
+ " data = {\n",
+ " \"ano\": [2020, 2021, 2020, 2021, 2020, 2021, 2021,2025],\n",
+ " \"mes\": [1, 2, 3, 4, 5, 6, 6,9],\n",
+ " \"sigla_uf\": [\"SP\", \"SP\", \"RJ\", \"RJ\", \"PR\", \"PR\", \"PR\",\"PR\"],\n",
+ " \"dado\": [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\", \"g\",'h'],\n",
+ " }\n",
+ " to_partitions(\n",
+ " data=pd.DataFrame(data),\n",
+ " partition_columns=['ano','mes','sigla_uf'],\n",
+ " savepath='partitions/',\n",
+ " )\n",
+ " \"\"\"\n",
+ "\n",
+ " if isinstance(data, (pd.core.frame.DataFrame)):\n",
+ " savepath = Path(savepath)\n",
+ " # create unique combinations between partition columns\n",
+ " unique_combinations = (\n",
+ " data[partition_columns]\n",
+ " # .astype(str)\n",
+ " .drop_duplicates(subset=partition_columns).to_dict(orient=\"records\")\n",
+ " )\n",
+ "\n",
+ " for filter_combination in unique_combinations:\n",
+ " patitions_values = [\n",
+ " f\"{partition}={value}\"\n",
+ " for partition, value in filter_combination.items()\n",
+ " ]\n",
+ "\n",
+ " # get filtered data\n",
+ " df_filter = data.loc[\n",
+ " data[filter_combination.keys()]\n",
+ " .isin(filter_combination.values())\n",
+ " .all(axis=1),\n",
+ " :,\n",
+ " ]\n",
+ " df_filter = df_filter.drop(columns=partition_columns)\n",
+ "\n",
+ " # create folder tree\n",
+ " filter_save_path = Path(savepath / \"/\".join(patitions_values))\n",
+ " filter_save_path.mkdir(parents=True, exist_ok=True)\n",
+ "\n",
+ " if file_type == \"csv\":\n",
+ " # append data to csv\n",
+ " file_filter_save_path = Path(filter_save_path) / \"data.csv\"\n",
+ " df_filter.to_csv(\n",
+ " file_filter_save_path,\n",
+ " sep=\",\",\n",
+ " encoding=\"utf-8\",\n",
+ " na_rep=\"\",\n",
+ " index=False,\n",
+ " mode=\"a\",\n",
+ " header=not file_filter_save_path.exists(),\n",
+ " )\n",
+ " elif file_type == \"parquet\":\n",
+ " # append data to parquet\n",
+ " file_filter_save_path = Path(filter_save_path) / \"data.parquet\"\n",
+ " df_filter.to_parquet(\n",
+ " file_filter_save_path, index=False, compression=\"gzip\"\n",
+ " )\n",
+ " else:\n",
+ " raise BaseException(\"Data need to be a pandas DataFrame\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Downloading: 100%|██████████| 5570/5570 [00:00<00:00, 6164.87rows/s]\n",
+ "Downloading: 100%|██████████| 27/27 [00:00<00:00, 86.26rows/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "natureza\n",
+ "subatividade_ibge\n"
+ ]
+ }
+ ],
+ "source": [
+ "df = pd.read_csv(\n",
+ " \"X:\\\\dados\\\\br_me_rais\\\\estabelecimento_2023\\\\RAIS_ESTAB_PUB.txt\",\n",
+ " encoding=\"latin1\",\n",
+ " sep=\";\",\n",
+ " dtype=str,\n",
+ ")\n",
+ "\n",
+ "df.rename(columns={\n",
+ " 'Bairros SP' : 'bairros_sp',\n",
+ " 'Bairros Fortaleza' : 'bairros_fortaleza',\n",
+ " 'Bairros RJ' : 'bairros_rj',\n",
+ " 'CNAE 2.0 Classe' : 'cnae_2',\n",
+ " 'CNAE 95 Classe' : 'cnae_1',\n",
+ " 'Distritos SP' : 'distritos_sp',\n",
+ " 'Qtd Vínculos CLT' : 'quantidade_vinculos_clt',\n",
+ " 'Qtd Vínculos Ativos' : 'quantidade_vinculos_ativos',\n",
+ " 'Qtd Vínculos Estatutários' : 'quantidade_vinculos_estatutarios',\n",
+ " 'Ind Atividade Ano' : 'indicador_atividade_ano',\n",
+ " 'Ind CEI Vinculado' : 'indicador_cei_vinculado',\n",
+ " 'Ind Estab Participa PAT' : 'indicador_pat',\n",
+ " 'Ind Rais Negativa' : 'indicador_rais_negativa',\n",
+ " 'Ind Simples' : 'indicador_simples',\n",
+ " 'Município' : 'municipio',\n",
+ " 'Natureza Jurídica' : 'natureza_juridica',\n",
+ " 'Regiões Adm DF' : 'regioes_administrativas_df',\n",
+ " 'CNAE 2.0 Subclasse' : 'cnae_2_subclasse',\n",
+ " 'Tamanho Estabelecimento' : 'tamanho',\n",
+ " 'Tipo Estab' : 'tipo',\n",
+ " 'UF' : 'uf',\n",
+ " 'IBGE Subsetor' : 'subsetor_ibge',\n",
+ " 'CEP Estab' : 'cep',\n",
+ " }, inplace=True)\n",
+ "\n",
+ "df['ano'] = 2023\n",
+ "\n",
+ "df['municipio'] = df['municipio'].astype(str)\n",
+ "\n",
+ "# Carregar os arquivos\n",
+ "\n",
+ "df_municipio = bd.read_sql('SELECT id_municipio, id_municipio_6 FROM `basedosdados.br_bd_diretorios_brasil.municipio`', billing_project_id='basedosdados', reauth=False)\n",
+ "df_uf = bd.read_sql('SELECT id_uf, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`', billing_project_id='basedosdados', reauth=False)\n",
+ "\n",
+ "# Mescla com o arquivo de municípios\n",
+ "df = pd.merge(df, df_municipio, left_on='municipio', right_on='id_municipio_6', how='left')\n",
+ "df.drop(['id_municipio_6', 'municipio'], axis=1, inplace=True)\n",
+ "\n",
+ "# Gerar a sigla_uf\n",
+ "\n",
+ "# Mescla com o arquivo de UFs\n",
+ "df['uf'] = df['uf'].astype(str)\n",
+ "df = pd.merge(df, df_uf, left_on='uf', right_on='id_uf', how='left')\n",
+ "df = df.drop(['id_uf', 'uf'], axis=1)\n",
+ "df = df.rename(columns={'sigla': 'sigla_uf'})\n",
+ "\n",
+ "\n",
+ "# Substitui sigla_uf vazia por \"IGNORADO\"\n",
+ "df['sigla_uf'].replace(np.nan, \"IGNORADO\", inplace=True)\n",
+ "\n",
+ "# Padronização das variáveis e dados\n",
+ "for col in df.columns:\n",
+ " if df[col].dtype == 'str':\n",
+ " df[col] = df[col].str.strip()\n",
+ " df[col].replace([\"{ñ\", \"{ñ class}\", \"{ñ c\"], \"\", inplace=True)\n",
+ "\n",
+ "# Lista de variáveis\n",
+ "vars_list = [\n",
+ " 'ano',\n",
+ " 'sigla_uf',\n",
+ " 'id_municipio',\n",
+ " 'quantidade_vinculos_ativos',\n",
+ " 'quantidade_vinculos_clt',\n",
+ " 'quantidade_vinculos_estatutarios',\n",
+ " 'natureza',\n",
+ " 'natureza_juridica',\n",
+ " 'tamanho',\n",
+ " 'tipo',\n",
+ " 'indicador_cei_vinculado',\n",
+ " 'indicador_pat',\n",
+ " 'indicador_simples',\n",
+ " 'indicador_rais_negativa',\n",
+ " 'indicador_atividade_ano',\n",
+ " 'cnae_1',\n",
+ " 'cnae_2',\n",
+ " 'cnae_2_subclasse',\n",
+ " 'subsetor_ibge',\n",
+ " 'subatividade_ibge',\n",
+ " 'cep',\n",
+ " 'bairros_sp',\n",
+ " 'distritos_sp',\n",
+ " 'bairros_fortaleza',\n",
+ " 'bairros_rj',\n",
+ " 'regioes_administrativas_df'\n",
+ "]\n",
+ "\n",
+ "# Gera as variáveis não confirmadas\n",
+ "for var in vars_list:\n",
+ " if var not in df.columns:\n",
+ " print(var)\n",
+ " df[var] = \"\"\n",
+ "\n",
+ "# Limpeza adicional de variáveis\n",
+ "for col in df.columns:\n",
+ " if df[col].dtype == 'str':\n",
+ " print(col)\n",
+ " df[col] = df[col].str.strip()\n",
+ " df[col].replace([\"{ñ\", \"{ñ class}\", \"{ñ c\", \"{ñ clas}\"], \"\", inplace=True)\n",
+ "\n",
+ "# Limpeza para variáveis específicas\n",
+ "for col in ['bairros_sp', 'distritos_sp', 'bairros_fortaleza', 'bairros_rj', 'distritos_sp', 'regioes_administrativas_df', 'cnae_2', 'cnae_2_subclasse', 'subsetor_ibge', 'subatividade_ibge']:\n",
+ " df[col].replace([\"0000\", \"00000\", \"000000\", \"0000000\", \"0000-1\", \"000-1\", \"998\", \"999\", \"9999\", \"9997\", \"00\", \"-1\"], \"\", inplace=True)\n",
+ "\n",
+ "# Limpeza de natureza_juridica e cep\n",
+ "df['natureza_juridica'].replace([\"9990\", \"9999\"], \"\", inplace=True)\n",
+ "df['cep'].replace(\"0\", \"\", inplace=True)\n",
+ "\n",
+ "# Ajuste na variável tipo\n",
+ "df['tipo'].replace([\"CNPJ\", \"Cnpj\", \"01\", \"1\"], \"1\", inplace=True)\n",
+ "df['tipo'].replace([\"CAEPF\", \"Caepf\"], \"2\", inplace=True)\n",
+ "df['tipo'].replace([\"CEI\", \"Cei\", \"CEI/CNO\", \"Cei/Cno\", \"CNO\", \"Cno\", \"03\", \"3\"], \"3\", inplace=True)\n",
+ "\n",
+ "# Converte colunas para numérico\n",
+ "cols_to_numeric = ['id_municipio', 'quantidade_vinculos_ativos', 'quantidade_vinculos_clt', 'quantidade_vinculos_estatutarios', 'tamanho', 'indicador_cei_vinculado', 'indicador_pat', 'indicador_simples', 'indicador_rais_negativa', 'indicador_atividade_ano']\n",
+ "df[cols_to_numeric] = df[cols_to_numeric].apply(pd.to_numeric, errors='coerce')\n",
+ "\n",
+ "# Reordena as colunas\n",
+ "df = df[vars_list]\n",
+ "\n",
+ "to_partitions(\n",
+ " data=df,\n",
+ " partition_columns=[\"ano\", \"sigla_uf\"],\n",
+ " savepath=\"X:\\\\dados\\\\br_me_rais\\\\estabelecimento_2023\\\\estabelecimento\",\n",
+ " file_type=\"csv\",\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/models/br_me_rais/code/rais_vinculo.ipynb b/models/br_me_rais/code/rais_vinculo.ipynb
new file mode 100755
index 00000000..4ff17521
--- /dev/null
+++ b/models/br_me_rais/code/rais_vinculo.ipynb
@@ -0,0 +1,452 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from datetime import datetime\n",
+ "from os.path import join\n",
+ "from pathlib import Path\n",
+ "from typing import Any, Dict, List, Optional, Tuple, Union\n",
+ "import basedosdados as bd # type: ignore\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import gc\n",
+ "import tqdm\n",
+ "\n",
+ "\n",
+ "def to_partitions(\n",
+ " data: pd.DataFrame,\n",
+ " partition_columns: List[str],\n",
+ " savepath: str,\n",
+ " file_type: str = \"csv\",\n",
+ "):\n",
+ " \"\"\"Save data in to hive patitions schema, given a dataframe and a list of partition columns.\n",
+ " Args:\n",
+ " data (pandas.core.frame.DataFrame): Dataframe to be partitioned.\n",
+ " partition_columns (list): List of columns to be used as partitions.\n",
+ " savepath (str, pathlib.PosixPath): folder path to save the partitions.\n",
+ " file_type (str): default to csv. Accepts parquet.\n",
+ " Exemple:\n",
+ " data = {\n",
+ " \"ano\": [2020, 2021, 2020, 2021, 2020, 2021, 2021,2025],\n",
+ " \"mes\": [1, 2, 3, 4, 5, 6, 6,9],\n",
+ " \"sigla_uf\": [\"SP\", \"SP\", \"RJ\", \"RJ\", \"PR\", \"PR\", \"PR\",\"PR\"],\n",
+ " \"dado\": [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\", \"g\",'h'],\n",
+ " }\n",
+ " to_partitions(\n",
+ " data=pd.DataFrame(data),\n",
+ " partition_columns=['ano','mes','sigla_uf'],\n",
+ " savepath='partitions/',\n",
+ " )\n",
+ " \"\"\"\n",
+ "\n",
+ " if isinstance(data, (pd.core.frame.DataFrame)):\n",
+ " savepath = Path(savepath)\n",
+ " # create unique combinations between partition columns\n",
+ " unique_combinations = (\n",
+ " data[partition_columns]\n",
+ " # .astype(str)\n",
+ " .drop_duplicates(subset=partition_columns).to_dict(orient=\"records\")\n",
+ " )\n",
+ "\n",
+ " for filter_combination in unique_combinations:\n",
+ " patitions_values = [\n",
+ " f\"{partition}={value}\"\n",
+ " for partition, value in filter_combination.items()\n",
+ " ]\n",
+ "\n",
+ " # get filtered data\n",
+ " df_filter = data.loc[\n",
+ " data[filter_combination.keys()]\n",
+ " .isin(filter_combination.values())\n",
+ " .all(axis=1),\n",
+ " :,\n",
+ " ]\n",
+ " df_filter = df_filter.drop(columns=partition_columns)\n",
+ "\n",
+ " # create folder tree\n",
+ " filter_save_path = Path(savepath / \"/\".join(patitions_values))\n",
+ " filter_save_path.mkdir(parents=True, exist_ok=True)\n",
+ "\n",
+ " if file_type == \"csv\":\n",
+ " # append data to csv\n",
+ " file_filter_save_path = Path(filter_save_path) / \"data.csv\"\n",
+ " df_filter.to_csv(\n",
+ " file_filter_save_path,\n",
+ " sep=\",\",\n",
+ " encoding=\"utf-8\",\n",
+ " na_rep=\"\",\n",
+ " index=False,\n",
+ " mode=\"a\",\n",
+ " header=not file_filter_save_path.exists(),\n",
+ " )\n",
+ " elif file_type == \"parquet\":\n",
+ " # append data to parquet\n",
+ " file_filter_save_path = Path(filter_save_path) / \"data.parquet\"\n",
+ " df_filter.to_parquet(\n",
+ " file_filter_save_path, index=False, compression=\"gzip\"\n",
+ " )\n",
+ " else:\n",
+ " raise BaseException(\"Data need to be a pandas DataFrame\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pd.set_option(\"display.max_columns\", None)\n",
+ "\n",
+ "chucks = []\n",
+ "\n",
+ "\n",
+ "df_municipio = bd.read_sql(\n",
+ " \"SELECT id_municipio, id_municipio_6, sigla_uf FROM `basedosdados.br_bd_diretorios_brasil.municipio`\",\n",
+ " billing_project_id=\"basedosdados\",\n",
+ " reauth=False,\n",
+ ")\n",
+ "\n",
+ "valor = 0\n",
+ "\n",
+ "for chunk in tqdm.tqdm(\n",
+ " pd.read_csv(\n",
+ " \"X:\\\\dados\\\\br_me_rais\\\\vinculos_2023\\\\RAIS_VINC_PUB_SUL.txt\",\n",
+ " sep=\";\",\n",
+ " encoding=\"latin1\",\n",
+ " low_memory=False,\n",
+ " decimal=\",\",\n",
+ " chunksize=100000,\n",
+ " )\n",
+ "):\n",
+ " valor = valor + 1\n",
+ " print(f\"Quantidade: {valor}\")\n",
+ "\n",
+ " ints = chunk.select_dtypes(include=[\"int64\", \"int32\", \"int16\"]).columns\n",
+ " chunk[ints] = chunk[ints].apply(pd.to_numeric, downcast=\"integer\")\n",
+ "\n",
+ " floats = chunk.select_dtypes(include=[\"float\"]).columns\n",
+ " chunk[floats] = chunk[floats].apply(pd.to_numeric, downcast=\"float\")\n",
+ "\n",
+ " objects = chunk.select_dtypes(\"object\").columns\n",
+ " chunk[objects] = chunk[objects].apply(lambda x: x.astype(\"category\"))\n",
+ "\n",
+ " chunk.rename(\n",
+ " columns={\n",
+ " \"Tipo Vínculo\": \"tipo_vinculo\",\n",
+ " \"Vínculo Ativo 31/12\": \"vinculo_ativo_3112\",\n",
+ " \"Tipo Admissão\": \"tipo_admissao\",\n",
+ " \"Mês Admissão\": \"mes_admissao\",\n",
+ " \"Mês Desligamento\": \"mes_desligamento\",\n",
+ " \"Motivo Desligamento\": \"motivo_desligamento\",\n",
+ " \"Causa Afastamento 1\": \"causa_desligamento_1\",\n",
+ " \"Causa Afastamento 2\": \"causa_desligamento_2\",\n",
+ " \"Causa Afastamento 3\": \"causa_desligamento_3\",\n",
+ " \"Faixa Tempo Emprego\": \"faixa_tempo_emprego\",\n",
+ " \"Tempo Emprego\": \"tempo_emprego\",\n",
+ " \"Faixa Hora Contrat\": \"faixa_horas_contratadas\",\n",
+ " \"Qtd Hora Contr\": \"quantidade_horas_contratadas\",\n",
+ " \"Mun Trab\": \"id_municipio_trabalho\",\n",
+ " \"Qtd Dias Afastamento\": \"quantidade_dias_afastamento\",\n",
+ " \"Ind CEI Vinculado\": \"indicador_cei_vinculado\",\n",
+ " \"Ind Trab Parcial\": \"indicador_trabalho_parcial\",\n",
+ " \"Ind Trab Intermitente\": \"indicador_trabalho_intermitente\",\n",
+ " \"Faixa Remun Média (SM)\": \"faixa_remuneracao_media_sm\",\n",
+ " \"Vl Remun Média (SM)\": \"valor_remuneracao_media_sm\",\n",
+ " \"Vl Remun Média Nom\": \"valor_remuneracao_media\",\n",
+ " \"Faixa Remun Dezem (SM)\": \"faixa_remuneracao_dezembro_sm\",\n",
+ " \"Vl Remun Dezembro (SM)\": \"valor_remuneracao_dezembro_sm\",\n",
+ " \"Vl Rem Janeiro SC\": \"valor_remuneracao_janeiro\",\n",
+ " \"Vl Rem Fevereiro SC\": \"valor_remuneracao_fevereiro\",\n",
+ " \"Vl Rem Março SC\": \"valor_remuneracao_marco\",\n",
+ " \"Vl Rem Abril SC\": \"valor_remuneracao_abril\",\n",
+ " \"Vl Rem Maio SC\": \"valor_remuneracao_maio\",\n",
+ " \"Vl Rem Junho SC\": \"valor_remuneracao_junho\",\n",
+ " \"Vl Rem Julho SC\": \"valor_remuneracao_julho\",\n",
+ " \"Vl Rem Agosto SC\": \"valor_remuneracao_agosto\",\n",
+ " \"Vl Rem Setembro SC\": \"valor_remuneracao_setembro\",\n",
+ " \"Vl Rem Outubro SC\": \"valor_remuneracao_outubro\",\n",
+ " \"Vl Rem Novembro SC\": \"valor_remuneracao_novembro\",\n",
+ " \"Vl Remun Dezembro Nom\": \"valor_remuneracao_dezembro\",\n",
+ " \"CBO Ocupação 2002\": \"cbo_2002\",\n",
+ " \"Faixa Etária\": \"faixa_etaria\",\n",
+ " \"Idade\": \"idade\",\n",
+ " \"Escolaridade após 2005\": \"grau_instrucao_apos_2005\",\n",
+ " \"Nacionalidade\": \"nacionalidade\",\n",
+ " \"Sexo Trabalhador\": \"sexo\",\n",
+ " \"Raça Cor\": \"raca_cor\",\n",
+ " \"Ind Portador Defic\": \"indicador_portador_deficiencia\",\n",
+ " \"Tipo Defic\": \"tipo_deficiencia\",\n",
+ " \"Ano Chegada Brasil\": \"ano_chegada_brasil\",\n",
+ " \"IBGE Subsetor\": \"subsetor_ibge\",\n",
+ " \"CNAE 95 Classe\": \"cnae_1\",\n",
+ " \"CNAE 2.0 Classe\": \"cnae_2\",\n",
+ " \"CNAE 2.0 Subclasse\": \"cnae_2_subclasse\",\n",
+ " \"Tamanho Estabelecimento\": \"tamanho_estabelecimento\",\n",
+ " \"Tipo Estab\": \"tipo_estabelecimento\",\n",
+ " \"Natureza Jurídica\": \"natureza_juridica\",\n",
+ " \"Ind Simples\": \"indicador_simples\",\n",
+ " \"Bairros SP\": \"bairros_sp\",\n",
+ " \"Distritos SP\": \"distritos_sp\",\n",
+ " \"Bairros Fortaleza\": \"bairros_fortaleza\",\n",
+ " \"Bairros RJ\": \"bairros_rj\",\n",
+ " \"Regiões Adm DF\": \"regioes_administrativas_df\",\n",
+ " \"Município\": \"municipio\",\n",
+ " },\n",
+ " inplace=True,\n",
+ " )\n",
+ "\n",
+ " chunk[\"ano\"] = 2023\n",
+ "\n",
+ " chunk[[\"municipio\", \"id_municipio_trabalho\"]] = chunk[\n",
+ " [\"municipio\", \"id_municipio_trabalho\"]\n",
+ " ].astype(str)\n",
+ "\n",
+ " # Mescla com o arquivo de municípios\n",
+ "\n",
+ " chunk = pd.merge(\n",
+ " chunk,\n",
+ " df_municipio,\n",
+ " left_on=[\"municipio\"],\n",
+ " right_on=[\"id_municipio_6\"],\n",
+ " how=\"left\",\n",
+ " )\n",
+ "\n",
+ " chunk = pd.merge(\n",
+ " chunk,\n",
+ " df_municipio,\n",
+ " left_on=[\"id_municipio_trabalho\"],\n",
+ " right_on=[\"id_municipio_6\"],\n",
+ " how=\"left\",\n",
+ " )\n",
+ "\n",
+ " chunk.drop(\n",
+ " [\n",
+ " \"id_municipio_trabalho\",\n",
+ " \"municipio\",\n",
+ " \"id_municipio_6_x\",\n",
+ " \"id_municipio_6_y\",\n",
+ " \"sigla_uf_y\",\n",
+ " ],\n",
+ " axis=1,\n",
+ " inplace=True,\n",
+ " )\n",
+ "\n",
+ " chunk.rename(\n",
+ " columns={\n",
+ " \"id_municipio_x\": \"id_municipio\",\n",
+ " \"sigla_uf_x\": \"sigla_uf\",\n",
+ " \"id_municipio_y\": \"id_municipio_trabalho\",\n",
+ " },\n",
+ " inplace=True,\n",
+ " )\n",
+ "\n",
+ " chunk[\"sigla_uf\"].replace([np.nan, \"NI\"], \"IGNORADO\", inplace=True)\n",
+ "\n",
+ " vars_list = [\n",
+ " \"ano\",\n",
+ " \"sigla_uf\",\n",
+ " \"id_municipio\",\n",
+ " \"tipo_vinculo\",\n",
+ " \"vinculo_ativo_3112\",\n",
+ " \"tipo_admissao\",\n",
+ " \"mes_admissao\",\n",
+ " \"mes_desligamento\",\n",
+ " \"motivo_desligamento\",\n",
+ " \"causa_desligamento_1\",\n",
+ " \"causa_desligamento_2\",\n",
+ " \"causa_desligamento_3\",\n",
+ " \"faixa_tempo_emprego\",\n",
+ " \"tempo_emprego\",\n",
+ " \"faixa_horas_contratadas\",\n",
+ " \"quantidade_horas_contratadas\",\n",
+ " \"id_municipio_trabalho\",\n",
+ " \"quantidade_dias_afastamento\",\n",
+ " \"indicador_cei_vinculado\",\n",
+ " \"indicador_trabalho_parcial\",\n",
+ " \"indicador_trabalho_intermitente\",\n",
+ " \"faixa_remuneracao_media_sm\",\n",
+ " \"valor_remuneracao_media_sm\",\n",
+ " \"valor_remuneracao_media\",\n",
+ " \"faixa_remuneracao_dezembro_sm\",\n",
+ " \"valor_remuneracao_dezembro_sm\",\n",
+ " \"valor_remuneracao_janeiro\",\n",
+ " \"valor_remuneracao_fevereiro\",\n",
+ " \"valor_remuneracao_marco\",\n",
+ " \"valor_remuneracao_abril\",\n",
+ " \"valor_remuneracao_maio\",\n",
+ " \"valor_remuneracao_junho\",\n",
+ " \"valor_remuneracao_julho\",\n",
+ " \"valor_remuneracao_agosto\",\n",
+ " \"valor_remuneracao_setembro\",\n",
+ " \"valor_remuneracao_outubro\",\n",
+ " \"valor_remuneracao_novembro\",\n",
+ " \"valor_remuneracao_dezembro\",\n",
+ " \"tipo_salario\",\n",
+ " \"valor_salario_contratual\",\n",
+ " \"subatividade_ibge\",\n",
+ " \"subsetor_ibge\",\n",
+ " \"cbo_1994\",\n",
+ " \"cbo_2002\",\n",
+ " \"cnae_1\",\n",
+ " \"cnae_2\",\n",
+ " \"cnae_2_subclasse\",\n",
+ " \"faixa_etaria\",\n",
+ " \"idade\",\n",
+ " \"grau_instrucao_1985_2005\",\n",
+ " \"grau_instrucao_apos_2005\",\n",
+ " \"nacionalidade\",\n",
+ " \"sexo\",\n",
+ " \"raca_cor\",\n",
+ " \"indicador_portador_deficiencia\",\n",
+ " \"tipo_deficiencia\",\n",
+ " \"ano_chegada_brasil\",\n",
+ " \"tamanho_estabelecimento\",\n",
+ " \"tipo_estabelecimento\",\n",
+ " \"natureza_juridica\",\n",
+ " \"indicador_simples\",\n",
+ " \"bairros_sp\",\n",
+ " \"distritos_sp\",\n",
+ " \"bairros_fortaleza\",\n",
+ " \"bairros_rj\",\n",
+ " \"regioes_administrativas_df\",\n",
+ " ]\n",
+ "\n",
+ " for var in vars_list:\n",
+ "\n",
+ " if var not in chunk.columns:\n",
+ " chunk[var] = \"\"\n",
+ "\n",
+ " # Limpeza de variáveis\n",
+ " chunk = chunk.applymap(lambda x: x.strip() if isinstance(x, str) else x)\n",
+ "\n",
+ " # Limpeza de códigos inválidos\n",
+ "\n",
+ " invalid_codes_bairros = [\n",
+ " \"0000\",\n",
+ " \"00000\",\n",
+ " \"000000\",\n",
+ " \"0000000\",\n",
+ " \"0000-1\",\n",
+ " \"000-1\",\n",
+ " \"9999\",\n",
+ " \"9997\",\n",
+ " ]\n",
+ " for col in [\n",
+ " \"bairros_rj\",\n",
+ " \"bairros_sp\",\n",
+ " \"bairros_fortaleza\",\n",
+ " \"distritos_sp\",\n",
+ " \"regioes_administrativas_df\",\n",
+ " ]:\n",
+ "\n",
+ " chunk[col].replace(invalid_codes_bairros, \"\", inplace=True)\n",
+ "\n",
+ " # Mais substituições de códigos\n",
+ "\n",
+ " invalid_codes_general = [\"0000\", \"00000\", \"000000\", \"0000000\", \"0000-1\", \"000-1\"]\n",
+ "\n",
+ " for col in [\n",
+ " \"cbo_1994\",\n",
+ " \"cbo_2002\",\n",
+ " \"cnae_1\",\n",
+ " \"cnae_2\",\n",
+ " \"cnae_2_subclasse\",\n",
+ " \"ano_chegada_brasil\",\n",
+ " ]:\n",
+ "\n",
+ " chunk[col].replace(invalid_codes_general, \"\", inplace=True)\n",
+ "\n",
+ " chunk[\"mes_admissao\"].replace(\"00\", \"\", inplace=True)\n",
+ "\n",
+ " chunk[\"mes_desligamento\"].replace(\"00\", \"\", inplace=True)\n",
+ "\n",
+ " chunk[\"motivo_desligamento\"].replace(\"0\", \"\", inplace=True)\n",
+ "\n",
+ " chunk[\"causa_desligamento_1\"].replace(\"99\", \"\", inplace=True)\n",
+ "\n",
+ " chunk[\"raca_cor\"].replace(\"99\", \"9\", inplace=True)\n",
+ "\n",
+ " # Ajustes adicionais\n",
+ "\n",
+ " chunk[\"natureza_juridica\"].replace([\"9990\", \"9999\"], \"\", inplace=True)\n",
+ "\n",
+ " chunk[\"tipo_estabelecimento\"] = chunk[\"tipo_estabelecimento\"].replace(\n",
+ " [\"CNPJ\", \"Cnpj\", \"01\", \"1\"], \"1\"\n",
+ " )\n",
+ "\n",
+ " chunk[\"tipo_estabelecimento\"] = chunk[\"tipo_estabelecimento\"].replace(\"CAEPF\", \"2\")\n",
+ "\n",
+ " chunk[\"tipo_estabelecimento\"] = chunk[\"tipo_estabelecimento\"].replace(\n",
+ " [\"CEI\", \"Cei\", \"CEI/CNO\", \"Cei/Cno\", \"CNO\", \"Cno\", \"03\", \"3\"], \"3\"\n",
+ " )\n",
+ "\n",
+ " # Conversão de valores monetários\n",
+ "\n",
+ " monetary_vars = [\n",
+ " \"valor_remuneracao_janeiro\",\n",
+ " \"valor_remuneracao_fevereiro\",\n",
+ " \"valor_remuneracao_marco\",\n",
+ " \"valor_remuneracao_abril\",\n",
+ " \"valor_remuneracao_maio\",\n",
+ " \"valor_remuneracao_junho\",\n",
+ " \"valor_remuneracao_julho\",\n",
+ " \"valor_remuneracao_agosto\",\n",
+ " \"valor_remuneracao_setembro\",\n",
+ " \"valor_remuneracao_outubro\",\n",
+ " \"valor_remuneracao_novembro\",\n",
+ " \"valor_remuneracao_dezembro\",\n",
+ " \"valor_salario_contratual\",\n",
+ " \"valor_remuneracao_dezembro_sm\",\n",
+ " \"valor_remuneracao_media\",\n",
+ " \"valor_remuneracao_media_sm\",\n",
+ " ]\n",
+ "\n",
+ " for var in monetary_vars:\n",
+ "\n",
+ " chunk[var] = chunk[var].astype(str)\n",
+ "\n",
+ " chunk[var] = chunk[var].str.replace(\",\", \".\")\n",
+ "\n",
+ " chunk = chunk[vars_list]\n",
+ "\n",
+ " print(\"Particionando...\")\n",
+ "\n",
+ " to_partitions(\n",
+ " data=chunk,\n",
+ " partition_columns=[\"ano\", \"sigla_uf\"],\n",
+ " savepath=\"X:\\\\dados\\\\br_me_rais\\\\vinculos_2023\\\\vinculos\",\n",
+ " file_type=\"csv\",\n",
+ " )\n",
+ "\n",
+ " del chunk\n",
+ "\n",
+ " gc.collect()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/models/br_me_rais/schema.yml b/models/br_me_rais/schema.yml
index 863e0cca..b58f2f05 100644
--- a/models/br_me_rais/schema.yml
+++ b/models/br_me_rais/schema.yml
@@ -135,39 +135,39 @@ models:
description: Indicador Trabalho Parcial
- name: indicador_trabalho_intermitente
description: Indicador Trabalho Intermitente
- - name: faixa_remun_media_sm
+ - name: faixa_remuneracao_media_sm
description: Faixa Remuneração Média (Salários Mínimos)
- - name: valor_remun_media_sm
+ - name: valor_remuneracao_media_sm
description: Valor da Remuneração Média (Salários Mínimos)
- - name: valor_remun_media_nominal
+ - name: valor_remuneracao_media
description: Valor da Remuneração Média (Nominal)
- - name: faixa_remun_dezembro_sm
+ - name: faixa_remuneracao_dezembro_sm
description: Faixa Remuneração em Dezembro (Salários Mínimos)
- - name: valor_remun_dezembro_sm
+ - name: valor_remuneracao_dezembro_sm
description: Valor da Remuneração em Dezembro (Salários Mínimos)
- - name: valor_remun_janeiro_nominal
+ - name: valor_remuneracao_janeiro
description: Valor da Remuneração em Janeiro (Nominal)
- - name: valor_remun_fevereiro_nominal
+ - name: valor_remuneracao_fevereiro
description: Valor da Remuneração em Fevereiro (Nominal)
- - name: valor_remun_marco_nominal
+ - name: valor_remuneracao_marco
description: Valor da Remuneração em Março (Nominal)
- - name: valor_remun_abril_nominal
+ - name: valor_remuneracao_abril
description: Valor da Remuneração em Abril (Nominal)
- - name: valor_remun_maio_nominal
+ - name: valor_remuneracao_maio
description: Valor da Remuneração em Maio (Nominal)
- - name: valor_remun_junho_nominal
+ - name: valor_remuneracao_junho
description: Valor da Remuneração em Junho (Nominal)
- - name: valor_remun_julho_nominal
+ - name: valor_remuneracao_julho
description: Valor da Remuneração em Julho (Nominal)
- - name: valor_remun_agosto_nominal
+ - name: valor_remuneracao_agosto
description: Valor da Remuneração em Agosto (Nominal)
- - name: valor_remun_setembro_nominal
+ - name: valor_remuneracao_setembro
description: Valor da Remuneração em Setembro (Nominal)
- - name: valor_remun_outubro_nominal
+ - name: valor_remuneracao_outubro
description: Valor da Remuneração em Outubro (Nominal)
- - name: valor_remun_novembro_nominal
+ - name: valor_remuneracao_novembro
description: Valor da Remuneração em Novembro (Nominal)
- - name: valor_remun_dezembro_nominal
+ - name: valor_remuneracao_dezembro
description: Valor da Remuneração em Dezembro (Nominal)
- name: tipo_salario
description: Tipo do Salário
@@ -220,7 +220,7 @@ models:
tests:
- relationships:
to: ref('br_bd_diretorios_brasil__subatividade_ibge')
- field: subatividade_ibge.subatividade_ibge
+ field: id_subatividade
- name: subsetor_ibge
description: Subsetor - IBGE
- name: cnae_1
@@ -235,11 +235,9 @@ models:
- name: cnae_2_subclasse
description: Classificação Nacional de Atividades Econômicas (CNAE) 2.0 Subclasse
tests:
- - custom_relationships:
- tags: [cnae]
- to: ref('br_bd_diretorios_brasil__cnae_2')
- field: subclasse
- ignore_values: ['9999997', '8630505', '3312101', '9999999', 00000-1]
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__cnae_2_subclasse')
+ field: cnae_2_subclasse.cnae_2_subclasse
- name: tamanho_estabelecimento
description: Tamanho do Estabelecimento
- name: tipo_estabelecimento
@@ -293,7 +291,7 @@ models:
- relationships:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
- - name: quantidade_vinculos_ativo
+ - name: quantidade_vinculos_ativos
description: Estoque de vínculos ativos em 31/12.
- name: quantidade_vinculos_clt
description: Estoque de vínculos, sob o regime CLT e Outros, ativos em 31/12
@@ -377,7 +375,7 @@ models:
- name: subatividade_ibge
description: Subatividade IBGE
tests:
- - custom_relationships:
+ - relationships:
to: ref('br_bd_diretorios_brasil__subatividade_ibge')
field: id_subatividade
- name: cep
diff --git a/models/br_ms_cnes/br_ms_cnes__dicionario.sql b/models/br_ms_cnes/br_ms_cnes__dicionario.sql
index 8b1159e6..b6b6c255 100644
--- a/models/br_ms_cnes/br_ms_cnes__dicionario.sql
+++ b/models/br_ms_cnes/br_ms_cnes__dicionario.sql
@@ -5,6 +5,6 @@ select
safe_cast(id_tabela as string) id_tabela,
safe_cast(nome_coluna as string) nome_coluna,
safe_cast(chave as string) chave,
- safe_cast(replace(cobertura_temporal, '-1', '(1)') as string) cobertura_temporal,
+ safe_cast(cobertura_temporal as string) cobertura_temporal,
safe_cast(valor as string) valor,
from `basedosdados-staging.br_ms_cnes_staging.dicionario` as t
diff --git a/models/br_ms_sim/br_ms_sim__dicionario.sql b/models/br_ms_sim/br_ms_sim__dicionario.sql
index cd5d5419..41d561d9 100644
--- a/models/br_ms_sim/br_ms_sim__dicionario.sql
+++ b/models/br_ms_sim/br_ms_sim__dicionario.sql
@@ -1,5 +1,5 @@
{{ config(alias="dicionario", schema="br_ms_sim") }}
--- Dicionário de dados do SIM
+
select
safe_cast(id_tabela as string) id_tabela,
safe_cast(coluna as string) nome_coluna,
diff --git a/models/br_sfb_sicar/br_sfb_sicar__area_imovel.sql b/models/br_sfb_sicar/br_sfb_sicar__area_imovel.sql
new file mode 100644
index 00000000..2c7ac700
--- /dev/null
+++ b/models/br_sfb_sicar/br_sfb_sicar__area_imovel.sql
@@ -0,0 +1,109 @@
+{{
+ config(
+ alias="area_imovel",
+ schema="br_sfb_sicar",
+ materialized="incremental",
+ partition_by={
+ "field": "data_atualizacao_car",
+ "data_type": "date",
+ "granularity": "day",
+ },
+ cluster_by=["sigla_uf"],
+ )
+}}
+
+with
+ municipios_car as (
+ select distinct
+ safe_cast(cod_estado as string) sigla_uf,
+ safe_cast(municipio as string) municipio_nome,
+
+ from `basedosdados-staging.br_sfb_sicar_staging.area_imovel` as t
+ ),
+
+ muncipios_car_diretorio as (
+ select sd.*, m.id_municipio as id_municipio
+ from municipios_car sd
+ left join
+ `basedosdados.br_bd_diretorios_brasil.municipio` as m
+ on lower(
+ regexp_replace(normalize(sd.municipio_nome, nfd), r"[^a-zA-Z0-9\s]", "")
+ )
+ = lower(regexp_replace(normalize(m.nome, nfd), r"[^a-zA-Z0-9\s]", ""))
+ and sd.sigla_uf = m.sigla_uf
+ ),
+ correcao_manual_falhas as (
+ select
+ sigla_uf,
+ municipio_nome,
+ case
+ when sigla_uf = 'PE' and municipio_nome = 'Iguaracy'
+ then '2606903'
+ when sigla_uf = 'RN' and municipio_nome = 'Januario Cicco'
+ then '2405306'
+ when sigla_uf = 'RN' and municipio_nome = "Olho d'Agua do Borges"
+ then '2408409'
+ when sigla_uf = 'PA' and municipio_nome = "Santa Izabel do Para"
+ then '1506500'
+ when sigla_uf = 'SP' and municipio_nome = "Florinea"
+ then '3516101'
+ when sigla_uf = 'SP' and municipio_nome = "Sao Luiz do Paraitinga"
+ then '3550001'
+ when sigla_uf = 'SP' and municipio_nome = "Biritiba Mirim"
+ then '3506607'
+ when sigla_uf = 'MT' and municipio_nome = "Santo Antonio de Leverger"
+ then '5107800'
+ when sigla_uf = 'MT' and municipio_nome = "Poxoreu"
+ then '5107008'
+ when sigla_uf = 'BA' and municipio_nome = "Muquem do Sao Francisco"
+ then '2922250'
+ when sigla_uf = 'MG' and municipio_nome = "Passa Vinte"
+ then '3147808'
+ when sigla_uf = 'SE' and municipio_nome = "Amparo do Sao Francisco"
+ then '2800100'
+ when sigla_uf = 'BA' and municipio_nome = "Santa Terezinha"
+ then '2928505'
+ when sigla_uf = 'TO' and municipio_nome = "Tabocao"
+ then '1708254'
+ when sigla_uf = 'MG' and municipio_nome = "Dona Euzebia"
+ then '3122900'
+ when sigla_uf = 'MG' and municipio_nome = "Sao Tome das Letras"
+ then '3165206'
+ when sigla_uf = 'SC' and municipio_nome = "Grao-Para"
+ then '4206108'
+ when sigla_uf = 'CE' and municipio_nome = "Itapaje"
+ then '2306306'
+ else id_municipio
+ end as id_municipio
+ from muncipios_car_diretorio
+ ),
+
+ final_table as (
+ select
+ safe_cast(data_extracao as date) data_extracao,
+ safe_cast(data_atualizacao_car as date) data_atualizacao_car,
+ safe_cast(cod_estado as string) sigla_uf,
+ safe_cast(t2.id_municipio as string) id_municipio,
+ safe_cast(cod_imovel as string) id_imovel,
+ safe_cast(mod_fiscal as string) modulos_fiscais,
+ safe_cast(num_area as float64) area,
+ safe_cast(ind_status as string) status,
+ safe_cast(ind_tipo as string) tipo,
+ safe_cast(des_condic as string) condicao,
+ safe_cast(
+ safe.st_geogfromtext(geometry, make_valid => true) as geography
+ ) geometria,
+ from `basedosdados-staging.br_sfb_sicar_staging.area_imovel` as car
+ left join
+ correcao_manual_falhas as t2
+ on car.sigla_uf = t2.sigla_uf
+ and car.municipio = t2.municipio_nome
+
+ )
+
+select *
+from final_table
+
+{% if is_incremental() %}
+ where data_extracao > (select max(data_extracao) from {{ this }})
+{% endif %}
diff --git a/models/br_sfb_sicar/br_sfb_sicar__dicionario.sql b/models/br_sfb_sicar/br_sfb_sicar__dicionario.sql
new file mode 100644
index 00000000..92a71f0d
--- /dev/null
+++ b/models/br_sfb_sicar/br_sfb_sicar__dicionario.sql
@@ -0,0 +1,16 @@
+{{
+ config(
+ alias="dicionario",
+ schema="br_sfb_sicar",
+ materialized="table",
+ )
+}}
+
+
+select
+ safe_cast(id_tabela as string) id_tabela,
+ safe_cast(nome_coluna as string) nome_coluna,
+ safe_cast(chave as string) chave,
+ safe_cast(cobertura_temporal as string) cobertura_temporal,
+ safe_cast(valor as string) valor,
+from `basedosdados-staging.br_sfb_sicar_staging.dicionario` as t
diff --git a/models/br_sfb_sicar/schema.yml b/models/br_sfb_sicar/schema.yml
new file mode 100644
index 00000000..55dea679
--- /dev/null
+++ b/models/br_sfb_sicar/schema.yml
@@ -0,0 +1,69 @@
+---
+version: 2
+models:
+ - name: br_sfb_sicar__area_imovel
+ description: Esta tabela contém o polígono de demarcação da área dos imóveis com
+ cadastro ambiental rural.
+ tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns: [id_imovel]
+ - not_null_proportion_multiple_columns:
+ at_least: 0.95
+ - custom_dictionary_coverage:
+ columns_covered_by_dictionary: [status, tipo]
+ dictionary_model: ref('br_sfb_sicar__dicionario')
+ columns:
+ - name: data_atualizacao_car
+ description: Data de atualização dos dados na fonte original. O data de atualização
+ pode variar a depender da Unidade da Federação.
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
+ - name: sigla_uf
+ description: Sigla da Unidade da Federação (UF) onde se localiza o cadastro
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ - name: id_municipio
+ description: ID Município - IBGE 7 Dígitos onde se localiza o cadastro
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
+ - name: id_imovel
+ description: Código de Inscrição no do Cadastro Ambiental Rural (CAR)
+ - name: modulos_fiscais
+ description: Quantidade de módulos fiscais do imóvel
+ - name: area
+ description: Área do imóvel
+ - name: status
+ description: Status do cadastro do imóvel
+ - name: tipo
+ description: Tipo do Imóvel Rural
+ - name: condicao
+ description: Condição em que o cadastro se encontra no fluxo de análise pelo
+ órgão competente
+ - name: geometria
+ description: Geometria do imóvel
+ - name: br_sfb_sicar__dicionario
+ description: Dicionário para tradução dos códigos das tabelas do conjunto br_sfb_sicar
+ tests:
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns:
+ - id_tabela
+ - nome_coluna
+ - chave
+ - cobertura_temporal
+ columns:
+ - name: id_tabela
+ description: ID Tabela
+ - name: nome_coluna
+ description: Nome da coluna
+ - name: chave
+ description: Chave
+ - name: cobertura_temporal
+ description: Cobertura Temporal
+ - name: valor
+ description: Valor
diff --git a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato.sql b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato.sql
index ea7549c6..6be9f640 100644
--- a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato.sql
+++ b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato.sql
@@ -6,7 +6,7 @@
partition_by={
"field": "ano",
"data_type": "int64",
- "range": {"start": 1945, "end": 2022, "interval": 1},
+ "range": {"start": 1945, "end": 2024, "interval": 1},
},
)
}}
@@ -15,7 +15,7 @@ select
safe_cast(turno as int64) turno,
safe_cast(id_eleicao as string) id_eleicao,
safe_cast(tipo_eleicao as string) tipo_eleicao,
- safe_cast(data_eleicao as string) data_eleicao,
+ safe_cast(data_eleicao as date) data_eleicao,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
safe_cast(id_municipio_tse as string) id_municipio_tse,
diff --git a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio.sql b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio.sql
index b11e3dab..e6d6f982 100644
--- a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio.sql
+++ b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio.sql
@@ -6,7 +6,7 @@
partition_by={
"field": "ano",
"data_type": "int64",
- "range": {"start": 1998, "end": 2022, "interval": 2},
+ "range": {"start": 1994, "end": 2024, "interval": 2},
},
cluster_by=["sigla_uf"],
)
@@ -16,7 +16,7 @@ select
safe_cast(turno as int64) turno,
safe_cast(id_eleicao as string) id_eleicao,
safe_cast(tipo_eleicao as string) tipo_eleicao,
- safe_cast(data_eleicao as string) data_eleicao,
+ safe_cast(data_eleicao as date) data_eleicao,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
safe_cast(id_municipio_tse as string) id_municipio_tse,
diff --git a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio_zona.sql b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio_zona.sql
index 76798436..b2b14d25 100644
--- a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio_zona.sql
+++ b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio_zona.sql
@@ -6,7 +6,7 @@
partition_by={
"field": "ano",
"data_type": "int64",
- "range": {"start": 1998, "end": 2022, "interval": 2},
+ "range": {"start": 1994, "end": 2024, "interval": 2},
},
cluster_by=["sigla_uf"],
)
@@ -16,7 +16,7 @@ select
safe_cast(turno as int64) turno,
safe_cast(id_eleicao as string) id_eleicao,
safe_cast(tipo_eleicao as string) tipo_eleicao,
- safe_cast(data_eleicao as string) data_eleicao,
+ safe_cast(data_eleicao as date) data_eleicao,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
safe_cast(id_municipio_tse as string) id_municipio_tse,
diff --git a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio.sql b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio.sql
index 5b13d924..f93eb9fd 100644
--- a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio.sql
+++ b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio.sql
@@ -6,7 +6,7 @@
partition_by={
"field": "ano",
"data_type": "int64",
- "range": {"start": 1994, "end": 2022, "interval": 2},
+ "range": {"start": 1994, "end": 2024, "interval": 2},
},
cluster_by=["sigla_uf"],
)
@@ -17,7 +17,7 @@ select
safe_cast(turno as int64) turno,
safe_cast(id_eleicao as string) id_eleicao,
safe_cast(tipo_eleicao as string) tipo_eleicao,
- safe_cast(data_eleicao as string) data_eleicao,
+ safe_cast(data_eleicao as date) data_eleicao,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
safe_cast(id_municipio_tse as string) id_municipio_tse,
diff --git a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio_zona.sql b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio_zona.sql
index 4dfa9e05..16fe21e2 100644
--- a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio_zona.sql
+++ b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio_zona.sql
@@ -6,7 +6,7 @@
partition_by={
"field": "ano",
"data_type": "int64",
- "range": {"start": 1994, "end": 2022, "interval": 2},
+ "range": {"start": 1994, "end": 2024, "interval": 2},
},
cluster_by=["sigla_uf"],
)
@@ -17,7 +17,7 @@ select
safe_cast(turno as int64) turno,
safe_cast(id_eleicao as string) id_eleicao,
safe_cast(tipo_eleicao as string) tipo_eleicao,
- safe_cast(data_eleicao as string) data_eleicao,
+ safe_cast(data_eleicao as date) data_eleicao,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
safe_cast(id_municipio_tse as string) id_municipio_tse,
diff --git a/models/br_tse_eleicoes/schema.yml b/models/br_tse_eleicoes/schema.yml
index b72411f2..a4a2015b 100644
--- a/models/br_tse_eleicoes/schema.yml
+++ b/models/br_tse_eleicoes/schema.yml
@@ -1174,7 +1174,7 @@ models:
- numero_candidato
- nome_candidato
- not_null_proportion_multiple_columns:
- at_least: 0.95
+ at_least: 0.80
columns:
- name: ano
description: Ano
@@ -1186,14 +1186,27 @@ models:
description: Cargo
- name: data_eleicao
description: Data da eleição
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
- name: id_candidato_bd
description: ID Candidato - Base dos Dados
- name: id_eleicao
description: ID Eleição
- name: id_municipio
description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
- name: id_municipio_tse
description: ID Município - TSE
+ tests:
+ - custom_relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio_tse
+ ignore_values: ['73709']
- name: nome_candidato
description: Nome do candidato
- name: numero_candidato
@@ -1208,6 +1221,11 @@ models:
description: Sigla do partido
- name: sigla_uf
description: Sigla da unidade da federação
+ tests:
+ - custom_relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ ignore_values: [GB, GP, RB]
- name: tipo_eleicao
description: Tipo da eleição
- name: turno
@@ -1222,11 +1240,13 @@ models:
- ano
- turno
- id_eleicao
+ - sigla_uf
- id_municipio_tse
+ - cargo
- sequencial_candidato
- numero_candidato
- not_null_proportion_multiple_columns:
- at_least: 0.95
+ at_least: 0.80
columns:
- name: ano
description: Ano
@@ -1238,14 +1258,27 @@ models:
description: Cargo
- name: data_eleicao
description: Data da eleição
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
- name: id_candidato_bd
description: ID Candidato - Base dos Dados
- name: id_eleicao
description: ID Eleição
- name: id_municipio
description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
- name: id_municipio_tse
description: ID Município - TSE
+ tests:
+ - custom_relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio_tse
+ ignore_values: ['73709']
- name: numero_candidato
description: Número do candidato
- name: numero_partido
@@ -1258,6 +1291,11 @@ models:
description: Sigla do partido
- name: sigla_uf
description: Sigla da unidade da federação
+ tests:
+ - custom_relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ ignore_values: [GB, GP, RB]
- name: tipo_eleicao
description: Tipo da eleição
- name: turno
@@ -1272,12 +1310,14 @@ models:
- ano
- turno
- id_eleicao
+ - sigla_uf
- id_municipio_tse
+ - cargo
- zona
- sequencial_candidato
- numero_candidato
- not_null_proportion_multiple_columns:
- at_least: 0.95
+ at_least: 0.80
columns:
- name: ano
description: Ano
@@ -1289,14 +1329,27 @@ models:
description: Cargo
- name: data_eleicao
description: Data da eleição
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
- name: id_candidato_bd
description: ID Candidato - Base dos Dados
- name: id_eleicao
description: ID Eleição
- name: id_municipio
description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
- name: id_municipio_tse
description: ID Município - TSE
+ tests:
+ - custom_relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio_tse
+ ignore_values: ['73709']
- name: numero_candidato
description: Número do candidato
- name: numero_partido
@@ -1309,6 +1362,11 @@ models:
description: Sigla do partido
- name: sigla_uf
description: Sigla da unidade da federação
+ tests:
+ - custom_relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ ignore_values: [GB, GP, RB]
- name: tipo_eleicao
description: Tipo da eleição
- name: turno
@@ -1383,7 +1441,7 @@ models:
- cargo
- numero_partido
- not_null_proportion_multiple_columns:
- at_least: 0.95
+ at_least: 0.90
columns:
- name: ano
description: Ano
@@ -1395,18 +1453,36 @@ models:
description: Cargo
- name: data_eleicao
description: Data da eleição
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
- name: id_eleicao
description: ID Eleição
- name: id_municipio
description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
- name: id_municipio_tse
description: ID Município - TSE
+ tests:
+ - custom_relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio_tse
+ ignore_values: ['73709']
- name: numero_partido
description: Número do partido
- name: sigla_partido
description: Sigla do partido
- name: sigla_uf
description: Sigla da unidade da federação
+ tests:
+ - custom_relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ ignore_values: [GB, GP, RB]
- name: tipo_eleicao
description: Tipo da eleição
- name: turno
@@ -1428,7 +1504,7 @@ models:
- cargo
- numero_partido
- not_null_proportion_multiple_columns:
- at_least: 0.95
+ at_least: 0.90
columns:
- name: ano
description: Ano
@@ -1440,18 +1516,36 @@ models:
description: Cargo
- name: data_eleicao
description: Data da eleição
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
- name: id_eleicao
description: ID Eleição
- name: id_municipio
description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
- name: id_municipio_tse
description: ID Município - TSE
+ tests:
+ - custom_relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio_tse
+ ignore_values: ['73709']
- name: numero_partido
description: Número do partido
- name: sigla_partido
description: Sigla do partido
- name: sigla_uf
description: Sigla da unidade da federação
+ tests:
+ - custom_relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ ignore_values: [GB, GP, RB]
- name: tipo_eleicao
description: Tipo da eleição
- name: turno
diff --git a/models/br_tse_filiacao_partidaria/br_tse_filiacao_partidaria__microdados.sql b/models/br_tse_filiacao_partidaria/br_tse_filiacao_partidaria__microdados.sql
new file mode 100644
index 00000000..32c0d22e
--- /dev/null
+++ b/models/br_tse_filiacao_partidaria/br_tse_filiacao_partidaria__microdados.sql
@@ -0,0 +1,51 @@
+{{
+ config(
+ schema="br_tse_filiacao_partidaria",
+ alias="microdados",
+ materialized="table",
+ unique_key="registro_filiacao",
+ partition_by={
+ "field": "data_extracao",
+ "data_type": "date",
+ },
+ cluster_by=["sigla_uf"],
+ )
+}}
+with
+ tabela as (
+ select
+ safe_cast(sqregistrofiliacao as string) registro_filiacao,
+ safe_cast(sgpartido as string) sigla_partido,
+ safe_cast(sigla_uf as string) sigla_uf,
+ safe_cast(id_municipio as string) id_municipio,
+ safe_cast(codlocalidadetse as string) id_municipio_tse,
+ safe_cast(numzona as string) zona,
+ safe_cast(numsecao as string) secao,
+ safe_cast(nrtituloeleitor as string) titulo_eleitor,
+ safe_cast(numcpf as string) cpf,
+ safe_cast(nmeleitor as string) nome,
+ safe_cast(nmsocialeleitor as string) nome_social,
+ safe_cast(tpsexo as string) sexo,
+ safe_cast(dessituacaoeleitor as string) situacao_registro,
+ safe_cast(cdmotivodesfiliacao as string) motivo_desfiliacao,
+ safe_cast(cdmotivocancelamento as string) motivo_cancelamento,
+ safe_cast(indorigem as string) indicador_origem,
+ safe_cast(dtfiliacao as date) data_filiacao,
+ safe_cast(dtdesfiliacao as date) data_desfiliacao,
+ safe_cast(tscadastrodesfiliacao as date) data_cadastro_desfiliacao,
+ safe_cast(dtcancelamento as date) data_cancelamento,
+ safe_cast(dtexclusao as date) data_exclusao,
+ safe_cast(data_extracao as date) data_extracao,
+ from `basedosdados-staging.br_tse_filiacao_partidaria_staging.microdados`
+ ),
+ select_rows as (
+ select
+ *,
+ row_number() over (
+ partition by registro_filiacao order by data_extracao desc
+ ) as rn
+ from tabela
+ )
+select * except (rn)
+from select_rows
+where rn = 1
diff --git a/models/br_tse_filiacao_partidaria/br_tse_filiacao_partidaria__microdados_antigos.sql b/models/br_tse_filiacao_partidaria/br_tse_filiacao_partidaria__microdados_antigos.sql
new file mode 100644
index 00000000..0edea6d0
--- /dev/null
+++ b/models/br_tse_filiacao_partidaria/br_tse_filiacao_partidaria__microdados_antigos.sql
@@ -0,0 +1,27 @@
+{{
+ config(
+ schema="br_tse_filiacao_partidaria",
+ alias="microdados_antigos",
+ materialized="table",
+ cluster_by=["sigla_uf"],
+ )
+}}
+
+select
+ safe_cast(sigla_partido as string) sigla_partido,
+ safe_cast(sigla_uf as string) sigla_uf,
+ safe_cast(id_municipio as string) id_municipio,
+ safe_cast(id_municipio_tse as string) id_municipio_tse,
+ safe_cast(zona as int64) zona,
+ safe_cast(secao as int64) secao,
+ safe_cast(titulo_eleitoral as string) titulo_eleitoral,
+ safe_cast(nome as string) nome,
+ ({{ validate_date_range("data_filiacao", "1980-01-01") }}) as data_filiacao,
+ safe_cast(situacao_registro as string) situacao_registro,
+ safe_cast(tipo_registro as string) tipo_registro,
+ {{ validate_date_range("data_processamento", "1980-01-01") }} as data_processamento,
+ {{ validate_date_range("data_desfiliacao", "1980-01-01") }} as data_desfiliacao,
+ {{ validate_date_range("data_cancelamento", "1980-01-01") }} as data_cancelamento,
+ {{ validate_date_range("data_regularizacao", "1980-01-01") }} as data_regularizacao,
+ safe_cast(motivo_cancelamento as string) motivo_cancelamento,
+from `basedosdados-staging.br_tse_filiacao_partidaria_staging.microdados_antigos` as t
diff --git a/models/br_tse_filiacao_partidaria/schema.yml b/models/br_tse_filiacao_partidaria/schema.yml
new file mode 100644
index 00000000..0afb543e
--- /dev/null
+++ b/models/br_tse_filiacao_partidaria/schema.yml
@@ -0,0 +1,166 @@
+---
+version: 2
+models:
+ - name: br_tse_filiacao_partidaria__microdados_antigos
+ description: Microdados antigos de filiação partidária do TSE.
+ tests:
+ - custom_not_null_proportion_multiple_columns:
+ at_least: 0.10
+ ignore_values: [data_regularizacao]
+ columns:
+ - name: sigla_partido
+ description: Sigla do partido
+ - name: sigla_uf
+ description: Sigla da unidade da federação
+ tests:
+ - custom_relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ ignore_values: [ZZ]
+ - name: id_municipio
+ description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
+ - name: id_municipio_tse
+ description: ID Município - TSE
+ - name: zona
+ description: Zona eleitoral
+ - name: secao
+ description: Seção eleitoral
+ - name: titulo_eleitoral
+ description: Título eleitoral
+ - name: nome
+ description: Nome
+ - name: data_filiacao
+ description: Data da filiação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
+ - name: situacao_registro
+ description: Situação do registro
+ - name: tipo_registro
+ description: Tipo de registro
+ - name: data_processamento
+ description: Data de processamento
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
+ - name: data_desfiliacao
+ description: Data de desfiliação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
+ - name: data_cancelamento
+ description: Data de cancelamento
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
+ - name: data_regularizacao
+ description: Data de regularização
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
+ - name: motivo_cancelamento
+ description: Motivo de cancelamento
+ - name: br_tse_filiacao_partidaria__microdados
+ description: Microdados de filiação partidária do TSE.
+ tests:
+ - custom_not_null_proportion_multiple_columns:
+ at_least: 0.55
+ ignore_values:
+ - data_desfiliacao
+ - data_exclusao
+ - data_cadastro_desfiliacao
+ - motivo_desfiliacao
+ - motivo_cancelamento
+ - data_cancelamento
+ - nome_social
+ - dbt_utils.unique_combination_of_columns:
+ combination_of_columns: [registro_filiacao]
+ columns:
+ - name: registro_filiacao
+ description: Sequecia do registro filiação
+ - name: sigla_partido
+ description: Sigla do partido
+ - name: sigla_uf
+ description: Sigla da unidade da federação
+ tests:
+ - custom_relationships:
+ to: ref('br_bd_diretorios_brasil__uf')
+ field: sigla
+ ignore_values: [ZZ]
+ - name: id_municipio
+ description: ID Município - IBGE 7 Dígitos
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_brasil__municipio')
+ field: id_municipio
+ - name: id_municipio_tse
+ description: ID Município - TSE
+ - name: zona
+ description: Zona eleitoral
+ - name: secao
+ description: Seção eleitoral
+ - name: titulo_eleitor
+ description: Título de eleitor
+ - name: cpf
+ description: Cadastro de pessoa física
+ - name: nome
+ description: Nome do eleitor
+ - name: nome_social
+ description: Nome social do eleitor
+ - name: sexo
+ description: Sexo
+ - name: situacao_registro
+ description: Situação do registro
+ - name: motivo_desfiliacao
+ description: Código do motivo da desfiliação. Não foi encontrada na documentação
+ a tradução deste código
+ - name: motivo_cancelamento
+ description: Código do motivo de cancelamento. Não foi encontrada na documentação
+ a tradução deste código
+ - name: indicador_origem
+ description: Não foi encontrado uma definição na documentação para este campo
+ - name: data_filiacao
+ description: Data da filiação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
+ - name: data_desfiliacao
+ description: Data de desfiliação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
+ - name: data_cadastro_desfiliacao
+ description: Data do cadastro da desfiliação
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
+ - name: data_cancelamento
+ description: Data de cancelamento
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
+ - name: data_exclusao
+ description: Data de exclusão
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
+ - name: data_extracao
+ description: Data de extração da linha
+ tests:
+ - relationships:
+ to: ref('br_bd_diretorios_data_tempo__data')
+ field: data.data
diff --git a/tests/generic/custom_null_proportion_multiple_columns.sql b/tests/generic/custom_null_proportion_multiple_columns.sql
new file mode 100644
index 00000000..e3a3bde2
--- /dev/null
+++ b/tests/generic/custom_null_proportion_multiple_columns.sql
@@ -0,0 +1,71 @@
+{% test custom_not_null_proportion_multiple_columns(
+ model, ignore_values, at_least=0.05
+) %}
+
+ {%- set columns = adapter.get_columns_in_relation(model) -%}
+ {% set suffix = "_nulls" %}
+ {% set pivot_columns_query %}
+
+ with null_counts as(
+
+ select
+ {% for column in columns -%}
+ {% if column.name not in ignore_values %}
+ SUM(CASE WHEN {{ column.name }} IS NULL THEN 1 ELSE 0 END) AS {{ column.name }}{{ suffix }},
+ {% endif %}
+ {%- endfor %}
+ count(*) as total_records
+ from {{ model }}
+ ),
+
+ pivot_columns as (
+
+ {% for column in columns -%}
+ {% if column.name not in ignore_values %}
+ select '{{ column.name }}' as column_name, {{ column.name }}{{ suffix }} as quantity, total_records
+ from null_counts
+ {% if not loop.last %}union all {% endif %}
+ {% endif %}
+ {%- endfor %}
+ ),
+
+ faulty_columns as (
+ select
+ *
+ from pivot_columns
+ where
+ quantity / total_records > (1 - {{ at_least }})
+
+
+ )
+ select * from faulty_columns
+ {% endset %}
+ with
+ validation_errors as (
+ {%- set errors = dbt_utils.get_query_results_as_dict(
+ pivot_columns_query
+ ) -%}
+ {% if errors["column_name"] != () %}
+ {% for e in errors["column_name"] | unique %}
+ {{
+ log(
+ "LOG: Coluna com preenchimento menor que "
+ ~ at_least * 100
+ ~ "% ---> "
+ ~ e
+ ~ " [FAIL]",
+ info=True,
+ )
+ }}
+ select '{{e}}' as column
+ {% if not loop.last %}
+ union all
+ {% endif %}
+ {% endfor %}
+ )
+ select *
+ from validation_errors
+ {% else %}select 1 as column) select * from validation_errors where column != 1
+ {% endif %}
+
+{% endtest %}