diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..ccfdd206 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,63 @@ + +# Template Pull Requests - Queries-basedosdados + +## Nomeação do Pull Request + +A nomeação de cada Pull Request (PR) deve seguir o seguinte padrão: + +- O título de cada Pull Request (PR) deve começar com uma das seguintes palavras-chave, entre colchetes. Além disso, **marque a palavra-chave que melhor descreve o seu PR atual**: + - [ ] **[Dbt]**: Para subida de novos dados em produção. + - [ ] **[Feature]**: Para novas funcionalidades. + - [ ] **[Table-Approve]**: Para rodar apenas o Table-Approve. + - [ ] **[Bugfix]**: Para correções de bugs. + - [ ] **[Refactor]**: Para mudanças no código que não alteram a funcionalidade. + - [ ] **[Docs]**: Para atualizações na documentação. + - [ ] **[Test]**: Para mudanças relacionadas a testes. + - [ ] **[Chore]**: Para tarefas menores e de manutenção. + +--- + - Exemplos de título: + - **[docs] br_me_caged** + - **[Feature] br_cgu_servidores_publicos** + +## Draft: +- Ao abrir o PR, deverá coloca-lo como draft + + +## Descrição do PR: +- Explique de maneira clara e concisa o objetivo deste PR. O que foi alterado? Qual o problema que ele resolve? + - **Motivação/Contexto:** + + +## Detalhes Técnicos: +- Detalhe as mudanças mais técnicas, como ajustes no dbt, scripts ou modelo de dados utilizado. + - **Principais alterações no dbt/scripts:** + - **Mudanças nos dados e no schema:** + - **Impacto no desempenho:** + +- Se alguma parte do código precisar de alguma atenção a mais, comente na linha sinalizando para os revisores. + +## Teste e Validações: + +- Relate os testes e validações relacionado aos dados/script: + - [ ] Testado em `queries-basedosdados-dev` + - [ ] **Decisões relacionadas aos testes:** + + - **Caso haja algo relacionado aos testes que vale a pena informar:** + +## Riscos e Mitigações: +- Identifique os riscos potenciais desta mudança e como mitigar esses Riscos + + - Riscos conhecidos: + - Planos de rollback: + +## Dependencias: +- Liste quaisquer dependências externas, como bibliotecas, outros PRs ou mudanças que precisam ser feitas antes deste merge. + - [ ] Dependências: + - [ ] Nenhuma dependencias adicional + + +## Revisadores: +- Quando o PR estiver pronto para ser revisado, retire o **Draft** através do **Ready for reviews**, marque os revisadores de repositório, envie o PR no nosso [discord](https://discord.gg/V3yTWRYWZZ) na aba **Correções de PRs, arquiteturas e afins** e marque a **@equipe_dados**: + - Revisadores recomendados no github: + - basedosdados/dados diff --git a/dbt_project.yml b/dbt_project.yml index a9d1f3bf..97933eb0 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,400 +1,409 @@ ---- -# Name your project! Project names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: basedosdados -version: 1.0.0 -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. -profile: default -# Options to disable elementary models -vars: - disable_run_results: false - disable_tests_results: false - disable_dbt_artifacts_autoupload: true - disable_dbt_invocation_autoupload: true -# These configurations specify where dbt should look for different types of files. -# The `source-paths` config, for example, states that models in this project can be -# found in the "models/" directory. You probably won't need to change these! -model-paths: [models] -analysis-paths: [analysis] -test-paths: [tests] -seed-paths: [data] -macro-paths: [macros] -snapshot-paths: [snapshots] -target-path: target # directory which will store compiled SQL files -clean-targets: # directories to be removed by `dbt clean` - - target - - dbt_modules -# Grant acess -# bq data control: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-control-language -# dbt grant statements use https://discourse.getdbt.com/t/the-exact-grant-statements-we-use-in-a-dbt-project/430 -# Configuring models -# Full documentation: https://docs.getdbt.com/docs/configuring-models -# In this example config, we tell dbt to build all models in the example/ directory -# as tables. These settings can be overridden in the individual model files -# using the `{{ config(...) }}` macro. -models: - +post-hook: GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "specialGroup:allUsers" - +persist_docs: - relation: true - columns: true - basedosdados: - # Config indicated by + and applies to all files under models/example/ - br_anatel_banda_larga_fixa: - +materialized: table - +schema: br_anatel_banda_larga_fixa - br_anatel_telefonia_movel: - +materialized: table # Materialization type (table, view or incremental) - +schema: br_anatel_telefonia_movel - br_anp_precos_combustiveis: - +materialized: table - +schema: br_anp_precos_combustiveis - br_ans_beneficiario: - +materialized: incremental - +schema: br_ans_beneficiario - br_b3_cotacoes: - +materialized: table - +schema: br_b3_cotacoes - br_bcb_agencia: - +materialized: table - +schema: br_bcb_agencia - br_bcb_estban: - +materialized: table - +schema: br_bcb_estban - br_bcb_sicor: - +materialized: table - +schema: br_bcb_sicor - br_bcb_taxa_cambio: - +materialized: table - +schema: br_bcb_taxa_cambio - +post-hook: - - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers" - - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org" - br_bcb_taxa_selic: - +materialized: table - +schema: br_bcb_taxa_selic - +post-hook: - - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers" - - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org" - br_bd_diretorios_brasil: - +materialized: table - +schema: br_bd_diretorios_brasil - br_bd_diretorios_mundo: - +materialized: table - +schema: br_bd_diretorios_mundo - br_bd_indicadores: - +materialized: table - +schema: br_bd_indicadores - br_bd_metadados: - +materialized: table - +schema: br_bd_metadados - br_bd_siga_o_dinheiro: - +materialized: table - +schema: br_bd_siga_o_dinheiro - br_camara_dados_abertos: - +materialized: table - +schema: br_camara_dados_abertos - br_ce_fortaleza_sefin_iptu: - +materialized: table - +schema: br_ce_fortaleza_sefin_iptu - +post-hook: - - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers" - - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org" - br_cgu_beneficios_cidadao: - +materialized: table - +schema: br_cgu_beneficios_cidadao - br_cgu_cartao_pagamento: - +materialized: table - +schema: br_cgu_cartao_pagamento - br_cgu_dados_abertos: - +materialized: table - +schema: br_cgu_dados_abertos - br_cgu_emendas_parlamentares: - +materialized: table - +schema: br_cgu_emendas_parlamentares - br_cgu_pessoal_executivo_federal: - +materialized: table - +schema: br_cgu_pessoal_executivo_federal - br_cgu_servidores_executivo_federal: - +materialized: table - +schema: br_cgu_servidores_executivo_federal - br_cnj_improbidade_administrativa: - +materialized: table - +schema: br_cnj_improbidade_administrativa - br_cnpq_bolsas: - +materialized: table - +schema: br_cnpq_bolsas - br_cvm_administradores_carteira: - +materialized: table - +schema: br_cvm_administradores_carteira - br_cvm_fi: - +materialized: table - +schema: br_cvm_fi - +post-hook: - - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers" - - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org" - br_cvm_oferta_publica_distribuicao: - +materialized: table - +schema: br_cvm_oferta_publica_distribuicao - br_datahackers_state_data: - +materialized: table - +schema: br_datahackers_state_data - br_denatran_frota: - +materialized: table - +schema: br_denatran_frota - br_fbsp_absp: - +materialized: table - +schema: br_fbsp_absp - br_fgv_igp: - +materialized: table - +schema: br_fgv_igp - br_geobr_mapas: - +materialized: table - +schema: br_geobr_mapas - br_ibge_censo_2022: - +materialized: table - +schema: br_ibge_censo_2022 - br_ibge_censo_demografico: - +materialized: table - +schema: br_ibge_censo_demografico - br_ibge_estadic: - +materialized: table - +schema: br_ibge_estadic - br_ibge_inpc: - +materialized: table - +schema: br_ibge_inpc - br_ibge_ipca: - +materialized: table - +schema: br_ibge_ipca - br_ibge_ipca15: - +materialized: table - +schema: br_ibge_ipca15 - br_ibge_pam: - +materialized: table - +schema: br_ibge_pam - br_ibge_pevs: - +materialized: table - +schema: br_ibge_pevs - br_ibge_pib: - +materialized: table - +schema: br_ibge_pib - br_ibge_pnad: - +materialized: table - +schema: br_ibge_pnad - br_ibge_pnad_covid: - +materialized: table - +schema: br_ibge_pnad_covid - br_ibge_pnadc: - +materialized: table - +schema: br_ibge_pnadc - br_ibge_pof: - +materialized: table - +schema: br_ibge_pof - br_ibge_ppm: - +materialized: table - +schema: br_ibge_ppm - br_inep_ana: - +materialized: table - +schema: br_inep_ana - br_inep_censo_educacao_superior: - +materialized: table - +schema: br_inep_censo_educacao_superior - br_inep_censo_escolar: - +materialized: table - +schema: br_inep_censo_escolar - br_inep_enem: - +materialized: table - +schema: br_inep_enem - br_inep_formacao_docente: - +materialized: table - +schema: br_inep_formacao_docente - br_inep_ideb: - +materialized: table - +schema: br_inep_ideb - br_inep_indicador_nivel_socioeconomico: - +materialized: table - +schema: br_inep_indicador_nivel_socioeconomico - br_inep_indicadores_educacionais: - +materialized: table - +schema: br_inep_indicadores_educacionais - br_inep_saeb: - +materialized: table - +schema: br_inep_saeb - br_inep_sinopse_estatistica_educacao_basica: - +materialized: table - +schema: br_inep_sinopse_estatistica_educacao_basica - br_inmet_bdmep: - +materialized: table - +schema: br_inmet_bdmep - br_inpe_prodes: - +materialized: table - +schema: br_inpe_prodes - br_inpe_queimadas: - +materialized: table - +schema: br_inpe_queimadas - br_inpe_sisam: - +materialized: table - +schema: br_inpe_sisam - br_ipea_avs: - +materialized: table - +schema: br_ipea_avs - br_jota: - +materialized: table - +schema: br_jota - br_mdr_snis: - +materialized: table - +schema: br_mdr_snis - br_me_caged: - +materialized: table - +schema: br_me_caged - br_me_cno: - +materialized: table - +schema: br_me_cno - br_me_cnpj: - +materialized: table - +schema: br_me_cnpj - br_me_comex_stat: - +materialized: table - +schema: br_me_comex_stat - br_me_rais: - +materialized: table - +schema: br_me_rais - br_me_sic: - +materialized: table - +schema: br_me_sic - br_me_siconfi: - +materialized: table - +schema: br_me_siconfi - br_mec_prouni: - +materialized: table - +schema: br_mec_prouni - br_mec_sisu: - +materialized: table - +schema: br_mec_sisu - br_mercadolivre_ofertas: - +materialized: table - +schema: br_mercadolivre_ofertas - +post-hook: - - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers" - - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org" - br_mg_belohorizonte_smfa_iptu: - +materialized: table - +schema: br_mg_belohorizonte_smfa_iptu - br_mme_consumo_energia_eletrica: - +materialized: table - +schema: br_mme_consumo_energia_eletrica - br_mp_pep: - +materialized: table - +schema: br_mp_pep - br_ms_cnes: - +materialized: table - +schema: br_ms_cnes - br_ms_pns: - +materialized: table - +schema: br_ms_pns - br_ms_sia: - +materialized: table - +schema: br_ms_sia - br_ms_sih: - +materialized: table - +schema: br_ms_sih - br_ms_sim: - +materialized: table - +schema: br_ms_sim - br_ms_sinan: - +materialized: table - +schema: br_ms_sinan - br_ms_sinasc: - +materialized: table - +schema: br_ms_sinasc - br_ms_sisvan: - +materialized: table - +schema: br_ms_sisvan - br_ms_vacinacao_covid19: - +materialized: table - +schema: br_ms_vacinacao_covid19 - br_ons_avaliacao_operacao: - +materialized: table - +schema: br_ons_avaliacao_operacao - br_ons_estimativa_custos: - +materialized: table - +schema: br_ons_estimativa_custos - br_poder360_pesquisas: - +materialized: table - +schema: br_poder360_pesquisas - br_rf_arrecadacao: - +materialized: table - +schema: br_rf_arrecadacao - br_rf_cafir: - +materialized: table - +schema: br_rf_cafir - br_rf_cno: - +materialized: table - +schema: br_rf_cno - br_rj_isp_estatisticas_seguranca: - +materialized: table - +schema: br_rj_isp_estatisticas_seguranca - br_sp_saopaulo_dieese_icv: - +materialized: table - +schema: br_sp_saopaulo_dieese_icv - br_sp_saopaulo_geosampa_iptu: - +materialized: table - +schema: br_sp_saopaulo_geosampa_iptu - +post-hook: - - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers" - - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org" - br_stf_corte_aberta: - +materialized: table - +schema: br_stf_corte_aberta - br_trase_supply_chain: - +materialized: table - +schema: br_trase_supply_chain - br_tse_eleicoes: - +materialized: table - +schema: br_tse_eleicoes - br_tse_eleicoes_2022: - +materialized: table - +schema: br_tse_eleicoes_2022 - example: - +materialized: table # Materialization type (table, table or incremental) - +schema: example # Overrides the default schema (defaults to what is set on profiles.yml) - fundacao_lemann: - +materialized: table - +schema: fundacao_lemann - mundo_bm_wdi: - +materialized: table - +schema: mundo_bm_wdi - mundo_transfermarkt_competicoes: - +materialized: table - +schema: mundo_transfermarkt_competicoes - mundo_transfermarkt_competicoes_internacionais: - +materialized: table - +schema: mundo_transfermarkt_competicoes_internacionais - test_dataset: - +materialized: table - +schema: test_dataset - world_ampas_oscar: - +materialized: table - +schema: world_ampas_oscar - world_iea_pirls: - +materialized: table - +schema: world_iea_pirls - world_oceanos_mapeamento: - +materialized: table - +schema: world_oceanos_mapeamento - world_oecd_public_finance: - +materialized: table - +schema: world_oecd_public_finance - world_olympedia_olympics: - +materialized: table - +schema: world_olympedia_olympics - world_wb_mides: - +materialized: table - +schema: world_wb_mides - world_wwf_hydrosheds: - +materialized: table - +schema: world_wwf_hydrosheds - elementary: - +schema: elementary +--- +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: basedosdados +version: 1.0.0 +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: default +# Options to disable elementary models +vars: + disable_run_results: false + disable_tests_results: false + disable_dbt_artifacts_autoupload: true + disable_dbt_invocation_autoupload: true +# These configurations specify where dbt should look for different types of files. +# The `source-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: [models] +analysis-paths: [analysis] +test-paths: [tests] +seed-paths: [data] +macro-paths: [macros] +snapshot-paths: [snapshots] +target-path: target # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - target + - dbt_modules +# Grant acess +# bq data control: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-control-language +# dbt grant statements use https://discourse.getdbt.com/t/the-exact-grant-statements-we-use-in-a-dbt-project/430 +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models +# In this example config, we tell dbt to build all models in the example/ directory +# as tables. These settings can be overridden in the individual model files +# using the `{{ config(...) }}` macro. +models: + +post-hook: GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "specialGroup:allUsers" + +persist_docs: + relation: true + columns: true + basedosdados: + # Config indicated by + and applies to all files under models/example/ + br_anatel_banda_larga_fixa: + +materialized: table + +schema: br_anatel_banda_larga_fixa + br_anatel_telefonia_movel: + +materialized: table # Materialization type (table, view or incremental) + +schema: br_anatel_telefonia_movel + br_anp_precos_combustiveis: + +materialized: table + +schema: br_anp_precos_combustiveis + br_ans_beneficiario: + +materialized: incremental + +schema: br_ans_beneficiario + br_b3_cotacoes: + +materialized: table + +schema: br_b3_cotacoes + br_bcb_agencia: + +materialized: table + +schema: br_bcb_agencia + br_bcb_estban: + +materialized: table + +schema: br_bcb_estban + br_bcb_sicor: + +materialized: table + +schema: br_bcb_sicor + br_bcb_taxa_cambio: + +materialized: table + +schema: br_bcb_taxa_cambio + +post-hook: + - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers" + - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org" + br_bcb_taxa_selic: + +materialized: table + +schema: br_bcb_taxa_selic + +post-hook: + - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers" + - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org" + br_bd_diretorios_brasil: + +materialized: table + +schema: br_bd_diretorios_brasil + br_bd_diretorios_mundo: + +materialized: table + +schema: br_bd_diretorios_mundo + br_bd_indicadores: + +materialized: table + +schema: br_bd_indicadores + br_bd_metadados: + +materialized: table + +schema: br_bd_metadados + br_bd_siga_o_dinheiro: + +materialized: table + +schema: br_bd_siga_o_dinheiro + br_camara_dados_abertos: + +materialized: table + +schema: br_camara_dados_abertos + br_ce_fortaleza_sefin_iptu: + +materialized: table + +schema: br_ce_fortaleza_sefin_iptu + +post-hook: + - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers" + - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org" + br_cgu_beneficios_cidadao: + +materialized: table + +schema: br_cgu_beneficios_cidadao + br_cgu_cartao_pagamento: + +materialized: table + +schema: br_cgu_cartao_pagamento + br_cgu_dados_abertos: + +materialized: table + +schema: br_cgu_dados_abertos + br_cgu_emendas_parlamentares: + +materialized: table + +schema: br_cgu_emendas_parlamentares + br_cgu_pessoal_executivo_federal: + +materialized: table + +schema: br_cgu_pessoal_executivo_federal + br_cgu_servidores_executivo_federal: + +materialized: table + +schema: br_cgu_servidores_executivo_federal + br_cnj_improbidade_administrativa: + +materialized: table + +schema: br_cnj_improbidade_administrativa + br_cnpq_bolsas: + +materialized: table + +schema: br_cnpq_bolsas + br_cvm_administradores_carteira: + +materialized: table + +schema: br_cvm_administradores_carteira + br_cvm_fi: + +materialized: table + +schema: br_cvm_fi + +post-hook: + - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers" + - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org" + br_cvm_oferta_publica_distribuicao: + +materialized: table + +schema: br_cvm_oferta_publica_distribuicao + br_datahackers_state_data: + +materialized: table + +schema: br_datahackers_state_data + br_denatran_frota: + +materialized: table + +schema: br_denatran_frota + br_fbsp_absp: + +materialized: table + +schema: br_fbsp_absp + br_fgv_igp: + +materialized: table + +schema: br_fgv_igp + br_geobr_mapas: + +materialized: table + +schema: br_geobr_mapas + br_ibge_censo_2022: + +materialized: table + +schema: br_ibge_censo_2022 + br_ibge_censo_demografico: + +materialized: table + +schema: br_ibge_censo_demografico + br_ibge_estadic: + +materialized: table + +schema: br_ibge_estadic + br_ibge_inpc: + +materialized: table + +schema: br_ibge_inpc + br_ibge_ipca: + +materialized: table + +schema: br_ibge_ipca + br_ibge_ipca15: + +materialized: table + +schema: br_ibge_ipca15 + br_ibge_pam: + +materialized: table + +schema: br_ibge_pam + br_ibge_pevs: + +materialized: table + +schema: br_ibge_pevs + br_ibge_pib: + +materialized: table + +schema: br_ibge_pib + br_ibge_pnad: + +materialized: table + +schema: br_ibge_pnad + br_ibge_pnad_covid: + +materialized: table + +schema: br_ibge_pnad_covid + br_ibge_pnadc: + +materialized: table + +schema: br_ibge_pnadc + br_ibge_pof: + +materialized: table + +schema: br_ibge_pof + br_ibge_ppm: + +materialized: table + +schema: br_ibge_ppm + br_inep_ana: + +materialized: table + +schema: br_inep_ana + br_inep_censo_educacao_superior: + +materialized: table + +schema: br_inep_censo_educacao_superior + br_inep_censo_escolar: + +materialized: table + +schema: br_inep_censo_escolar + br_inep_educacao_especial: + +materialized: table + +schema: br_inep_educacao_especial + br_inep_enem: + +materialized: table + +schema: br_inep_enem + br_inep_formacao_docente: + +materialized: table + +schema: br_inep_formacao_docente + br_inep_ideb: + +materialized: table + +schema: br_inep_ideb + br_inep_indicador_nivel_socioeconomico: + +materialized: table + +schema: br_inep_indicador_nivel_socioeconomico + br_inep_indicadores_educacionais: + +materialized: table + +schema: br_inep_indicadores_educacionais + br_inep_saeb: + +materialized: table + +schema: br_inep_saeb + br_inep_sinopse_estatistica_educacao_basica: + +materialized: table + +schema: br_inep_sinopse_estatistica_educacao_basica + br_inmet_bdmep: + +materialized: table + +schema: br_inmet_bdmep + br_inpe_prodes: + +materialized: table + +schema: br_inpe_prodes + br_inpe_queimadas: + +materialized: table + +schema: br_inpe_queimadas + br_inpe_sisam: + +materialized: table + +schema: br_inpe_sisam + br_ipea_avs: + +materialized: table + +schema: br_ipea_avs + br_jota: + +materialized: table + +schema: br_jota + br_mdr_snis: + +materialized: table + +schema: br_mdr_snis + br_me_caged: + +materialized: table + +schema: br_me_caged + br_me_cno: + +materialized: table + +schema: br_me_cno + br_me_cnpj: + +materialized: table + +schema: br_me_cnpj + br_me_comex_stat: + +materialized: table + +schema: br_me_comex_stat + br_me_rais: + +materialized: table + +schema: br_me_rais + br_me_sic: + +materialized: table + +schema: br_me_sic + br_me_siconfi: + +materialized: table + +schema: br_me_siconfi + br_mec_prouni: + +materialized: table + +schema: br_mec_prouni + br_mec_sisu: + +materialized: table + +schema: br_mec_sisu + br_mercadolivre_ofertas: + +materialized: table + +schema: br_mercadolivre_ofertas + +post-hook: + - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers" + - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org" + br_mg_belohorizonte_smfa_iptu: + +materialized: table + +schema: br_mg_belohorizonte_smfa_iptu + br_mme_consumo_energia_eletrica: + +materialized: table + +schema: br_mme_consumo_energia_eletrica + br_mp_pep: + +materialized: table + +schema: br_mp_pep + br_ms_cnes: + +materialized: table + +schema: br_ms_cnes + br_ms_pns: + +materialized: table + +schema: br_ms_pns + br_ms_sia: + +materialized: table + +schema: br_ms_sia + br_ms_sih: + +materialized: table + +schema: br_ms_sih + br_ms_sim: + +materialized: table + +schema: br_ms_sim + br_ms_sinan: + +materialized: table + +schema: br_ms_sinan + br_ms_sinasc: + +materialized: table + +schema: br_ms_sinasc + br_ms_sisvan: + +materialized: table + +schema: br_ms_sisvan + br_ms_vacinacao_covid19: + +materialized: table + +schema: br_ms_vacinacao_covid19 + br_ons_avaliacao_operacao: + +materialized: table + +schema: br_ons_avaliacao_operacao + br_ons_estimativa_custos: + +materialized: table + +schema: br_ons_estimativa_custos + br_poder360_pesquisas: + +materialized: table + +schema: br_poder360_pesquisas + br_rf_arrecadacao: + +materialized: table + +schema: br_rf_arrecadacao + br_rf_cafir: + +materialized: table + +schema: br_rf_cafir + br_rf_cno: + +materialized: table + +schema: br_rf_cno + br_rj_isp_estatisticas_seguranca: + +materialized: table + +schema: br_rj_isp_estatisticas_seguranca + br_sfb_sicar: + +materialized: table + +schema: br_sfb_sicar + br_sp_saopaulo_dieese_icv: + +materialized: table + +schema: br_sp_saopaulo_dieese_icv + br_sp_saopaulo_geosampa_iptu: + +materialized: table + +schema: br_sp_saopaulo_geosampa_iptu + +post-hook: + - REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers" + - GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:bd-pro@basedosdados.org" + br_stf_corte_aberta: + +materialized: table + +schema: br_stf_corte_aberta + br_trase_supply_chain: + +materialized: table + +schema: br_trase_supply_chain + br_tse_eleicoes: + +materialized: table + +schema: br_tse_eleicoes + br_tse_eleicoes_2022: + +materialized: table + +schema: br_tse_eleicoes_2022 + br_tse_filiacao_partidaria: + +materialized: table + +schema: br_tse_filiacao_partidaria + example: + +materialized: table # Materialization type (table, table or incremental) + +schema: example # Overrides the default schema (defaults to what is set on profiles.yml) + fundacao_lemann: + +materialized: table + +schema: fundacao_lemann + mundo_bm_wdi: + +materialized: table + +schema: mundo_bm_wdi + mundo_transfermarkt_competicoes: + +materialized: table + +schema: mundo_transfermarkt_competicoes + mundo_transfermarkt_competicoes_internacionais: + +materialized: table + +schema: mundo_transfermarkt_competicoes_internacionais + test_dataset: + +materialized: table + +schema: test_dataset + world_ampas_oscar: + +materialized: table + +schema: world_ampas_oscar + world_iea_pirls: + +materialized: table + +schema: world_iea_pirls + world_oceanos_mapeamento: + +materialized: table + +schema: world_oceanos_mapeamento + world_oecd_public_finance: + +materialized: table + +schema: world_oecd_public_finance + world_olympedia_olympics: + +materialized: table + +schema: world_olympedia_olympics + world_wb_mides: + +materialized: table + +schema: world_wb_mides + world_wwf_hydrosheds: + +materialized: table + +schema: world_wwf_hydrosheds + elementary: + +schema: elementary diff --git a/macros/validate_date_range.sql b/macros/validate_date_range.sql new file mode 100644 index 00000000..79a441ad --- /dev/null +++ b/macros/validate_date_range.sql @@ -0,0 +1,25 @@ +{% macro validate_date_range(column_name, start_date, end_date=None) %} + + {% if end_date is none %} + case + when {{ column_name }} is null + then null + when + date({{ column_name }}) >= date('{{ start_date }}') + and date({{ column_name }}) <= current_date() + then safe_cast({{ column_name }} as date) + else null + end + {% else %} + case + when {{ column_name }} is null + then null + when + date({{ column_name }}) >= date('{{ start_date }}') + and date({{ column_name }}) <= date('{{ end_date }}') + then safe_cast({{ column_name }} as date) + else null + end + {% endif %} + +{% endmacro %} diff --git a/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__curso.sql b/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__curso.sql index e62c7827..77215158 100644 --- a/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__curso.sql +++ b/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__curso.sql @@ -5,14 +5,13 @@ partition_by={ "field": "ano", "data_type": "int64", - "range": {"start": 2009, "end": 2022, "interval": 1}, + "range": {"start": 2009, "end": 2024, "interval": 1}, }, cluster_by="sigla_uf", ) }} select - safe_cast(ano as int64) ano, safe_cast(sigla_uf as string) sigla_uf, safe_cast(id_municipio as string) id_municipio, diff --git a/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__ies.sql b/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__ies.sql index 6513fc9a..558f5ecb 100644 --- a/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__ies.sql +++ b/models/br_inep_censo_educacao_superior/br_inep_censo_educacao_superior__ies.sql @@ -5,11 +5,12 @@ partition_by={ "field": "ano", "data_type": "int64", - "range": {"start": 2009, "end": 2022, "interval": 1}, + "range": {"start": 2009, "end": 2024, "interval": 1}, }, cluster_by="sigla_uf", ) }} + select safe_cast(ano as int64) ano, safe_cast(sigla_uf as string) sigla_uf, diff --git a/models/br_inep_censo_educacao_superior/schema.yml b/models/br_inep_censo_educacao_superior/schema.yml index 374496c4..88432749 100644 --- a/models/br_inep_censo_educacao_superior/schema.yml +++ b/models/br_inep_censo_educacao_superior/schema.yml @@ -49,18 +49,10 @@ models: description: Rede de Ensino - name: id_ies description: Código da Instituição - tests: - - relationships: - to: ref('br_bd_diretorios_brasil__instituicao_ensino_superior') - field: id_ies - name: nome_curso description: Nome do Curso - name: id_curso description: Código do Curso - tests: - - relationships: - to: ref('br_bd_diretorios_brasil__curso_superior') - field: id_curso - name: nome_curso_cine description: Nome do curso, conforme adaptação da Classificação Internacional Normalizada da Educação Cine/Unesco @@ -600,10 +592,6 @@ models: description: Código único de identificação da mantenedora da IES - name: id_ies description: Código único de identificação da IES - tests: - - relationships: - to: ref('br_bd_diretorios_brasil__instituicao_ensino_superior') - field: id_ies - name: nome description: Nome da IES - name: sigla diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__brasil_distorcao_idade_serie.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__brasil_distorcao_idade_serie.sql new file mode 100644 index 00000000..b7547236 --- /dev/null +++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__brasil_distorcao_idade_serie.sql @@ -0,0 +1,15 @@ +{{ + config( + alias="brasil_distorcao_idade_serie", + schema="br_inep_educacao_especial", + materialized="table", + ) +}} + +select + safe_cast(ano as int64) ano, + safe_cast(etapa_ensino as string) etapa_ensino, + safe_cast(tdi as float64) tdi, +from + `basedosdados-staging.br_inep_educacao_especial_staging.brasil_distorcao_idade_serie` + as t diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__brasil_taxa_rendimento.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__brasil_taxa_rendimento.sql new file mode 100644 index 00000000..953b4c0b --- /dev/null +++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__brasil_taxa_rendimento.sql @@ -0,0 +1,16 @@ +{{ + config( + alias="brasil_taxa_rendimento", + schema="br_inep_educacao_especial", + materialized="table", + ) +}} + +select + safe_cast(ano as int64) ano, + safe_cast(etapa_ensino as string) etapa_ensino, + safe_cast(taxa_aprovacao as float64) taxa_aprovacao, + safe_cast(taxa_reprovacao as float64) taxa_reprovacao, + safe_cast(taxa_abandono as float64) taxa_abandono, +from + `basedosdados-staging.br_inep_educacao_especial_staging.brasil_taxa_rendimento` as t diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__docente_aee.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__docente_aee.sql new file mode 100644 index 00000000..45dc6d5b --- /dev/null +++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__docente_aee.sql @@ -0,0 +1,21 @@ +{{ + config( + alias="docente_aee", + schema="br_inep_educacao_especial", + materialized="table", + ) +}} + +select + safe_cast(ano as int64) ano, + safe_cast(sigla_uf as string) sigla_uf, + safe_cast(id_municipio as string) id_municipio, + safe_cast(quantidade_docente_regente as numeric) quantidade_docente_regente, + safe_cast(quantidade_docente_aee as numeric) quantidade_docente_aee, + safe_cast( + quantidade_docente_regente_formacao_continuada as int64 + ) quantidade_docente_regente_formacao_continuada, + safe_cast( + quantidade_docente_aee_formacao_continuada as int64 + ) quantidade_docente_aee_formacao_continuada, +from `basedosdados-staging.br_inep_educacao_especial_staging.docente_aee` as t diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__docente_formacao.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__docente_formacao.sql new file mode 100644 index 00000000..ba4334ed --- /dev/null +++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__docente_formacao.sql @@ -0,0 +1,18 @@ +{{ + config( + alias="docente_formacao", + schema="br_inep_educacao_especial", + materialized="table", + ) +}} + +select + safe_cast(ano as int64) ano, + safe_cast(sigla_uf as string) sigla_uf, + safe_cast(id_municipio as string) id_municipio, + safe_cast(rede as string) rede, + safe_cast( + quantidade_docente_formacao_continuada as numeric + ) quantidade_docente_formacao_continuada, + +from `basedosdados-staging.br_inep_educacao_especial_staging.docente_formacao` as t diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_etapa_ensino.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__etapa_ensino.sql similarity index 75% rename from models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_etapa_ensino.sql rename to models/br_inep_educacao_especial/br_inep_educacao_especial__etapa_ensino.sql index 09c49394..2acac0d8 100644 --- a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_etapa_ensino.sql +++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__etapa_ensino.sql @@ -1,7 +1,7 @@ {{ config( - alias="educacao_especial_etapa_ensino", - schema="br_inep_sinopse_estatistica_educacao_basica", + alias="etapa_ensino", + schema="br_inep_educacao_especial", materialized="table", partition_by={ "field": "ano", @@ -24,6 +24,4 @@ select end as string ) etapa_ensino, safe_cast(quantidade_matricula as numeric) quantidade_matricula -from - `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.educacao_especial_etapa_ensino` - as t +from `basedosdados-staging.br_inep_educacao_especial_staging.etapa_ensino` as t diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__faixa_etaria.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__faixa_etaria.sql new file mode 100644 index 00000000..f4073d74 --- /dev/null +++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__faixa_etaria.sql @@ -0,0 +1,21 @@ +{{ + config( + alias="faixa_etaria", + schema="br_inep_educacao_especial", + materialized="table", + partition_by={ + "field": "ano", + "data_type": "int64", + "range": {"start": 2007, "end": 2023, "interval": 1}, + }, + cluster_by="sigla_uf", + ) +}} +select + safe_cast(ano as int64) ano, + safe_cast(sigla_uf as string) sigla_uf, + safe_cast(id_municipio as string) id_municipio, + safe_cast(tipo_classe as string) tipo_classe, + safe_cast(faixa_etaria as string) faixa_etaria, + safe_cast(quantidade_matricula as numeric) quantidade_matricula, +from `basedosdados-staging.br_inep_educacao_especial_staging.faixa_etaria` as t diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__localizacao.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__localizacao.sql new file mode 100644 index 00000000..cf12893a --- /dev/null +++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__localizacao.sql @@ -0,0 +1,22 @@ +{{ + config( + alias="localizacao", + schema="br_inep_educacao_especial", + materialized="table", + partition_by={ + "field": "ano", + "data_type": "int64", + "range": {"start": 2007, "end": 2023, "interval": 1}, + }, + cluster_by="sigla_uf", + ) +}} +select + safe_cast(ano as int64) ano, + safe_cast(sigla_uf as string) sigla_uf, + safe_cast(id_municipio as string) id_municipio, + safe_cast(tipo_classe as string) tipo_classe, + safe_cast(rede as string) rede, + safe_cast(localizacao as string) localizacao, + safe_cast(quantidade_matricula as numeric) quantidade_matricula, +from `basedosdados-staging.br_inep_educacao_especial_staging.localizacao` as t diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__matricula_aee.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__matricula_aee.sql new file mode 100644 index 00000000..b15b5053 --- /dev/null +++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__matricula_aee.sql @@ -0,0 +1,16 @@ +{{ + config( + alias="matricula_aee", + schema="br_inep_educacao_especial", + materialized="table", + ) +}} + +select + safe_cast(ano as int64) ano, + safe_cast(sigla_uf as string) sigla_uf, + safe_cast(rede as string) rede, + safe_cast(quantidade_matricula as numeric) quantidade_matricula, + safe_cast(quantidade_matricula_aee as numeric) quantidade_matricula_aee, + +from `basedosdados-staging.br_inep_educacao_especial_staging.matricula_aee` as t diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_sexo_raca_cor.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__sexo_raca_cor.sql similarity index 73% rename from models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_sexo_raca_cor.sql rename to models/br_inep_educacao_especial/br_inep_educacao_especial__sexo_raca_cor.sql index 96b61508..c9e3b242 100644 --- a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_sexo_raca_cor.sql +++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__sexo_raca_cor.sql @@ -1,7 +1,7 @@ {{ config( - alias="educacao_especial_sexo_raca_cor", - schema="br_inep_sinopse_estatistica_educacao_basica", + alias="sexo_raca_cor", + schema="br_inep_educacao_especial", materialized="table", partition_by={ "field": "ano", @@ -21,6 +21,4 @@ select case when raca_cor = 'Fmarela' then 'Amarela' else raca_cor end as string ) raca_cor, safe_cast(quantidade_matricula as numeric) quantidade_matricula, -from - `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.educacao_especial_sexo_raca_cor` - as t +from `basedosdados-staging.br_inep_educacao_especial_staging.sexo_raca_cor` as t diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tempo_ensino.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__tempo_ensino.sql similarity index 71% rename from models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tempo_ensino.sql rename to models/br_inep_educacao_especial/br_inep_educacao_especial__tempo_ensino.sql index 8de1398b..17158c1b 100644 --- a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tempo_ensino.sql +++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__tempo_ensino.sql @@ -1,7 +1,7 @@ {{ config( - alias="educacao_especial_tempo_ensino", - schema="br_inep_sinopse_estatistica_educacao_basica", + alias="tempo_ensino", + schema="br_inep_educacao_especial", materialized="table", partition_by={ "field": "ano", @@ -19,6 +19,4 @@ select safe_cast(rede as string) rede, safe_cast(tempo_ensino as string) tempo_ensino, safe_cast(quantidade_matricula as numeric) quantidade_matricula, -from - `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.educacao_especial_tempo_ensino` - as t +from `basedosdados-staging.br_inep_educacao_especial_staging.tempo_ensino` as t diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tipo_deficiencia.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__tipo_deficiencia.sql similarity index 75% rename from models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tipo_deficiencia.sql rename to models/br_inep_educacao_especial/br_inep_educacao_especial__tipo_deficiencia.sql index 988decef..a98caf1a 100644 --- a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tipo_deficiencia.sql +++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__tipo_deficiencia.sql @@ -1,7 +1,7 @@ {{ config( - alias="educacao_especial_tipo_deficiencia", - schema="br_inep_sinopse_estatistica_educacao_basica", + alias="tipo_deficiencia", + schema="br_inep_educacao_especial", materialized="table", partition_by={ "field": "ano", @@ -24,6 +24,4 @@ select end as string ) tipo_deficiencia, safe_cast(quantidade_matricula as numeric) quantidade_matricula, -from - `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.educacao_especial_tipo_deficiencia` - as t +from `basedosdados-staging.br_inep_educacao_especial_staging.tipo_deficiencia` as t diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__uf_distorcao_idade_serie.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__uf_distorcao_idade_serie.sql new file mode 100644 index 00000000..a4c93bb0 --- /dev/null +++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__uf_distorcao_idade_serie.sql @@ -0,0 +1,16 @@ +{{ + config( + alias="uf_distorcao_idade_serie", + schema="br_inep_educacao_especial", + materialized="table", + ) +}} + +select + safe_cast(ano as int64) ano, + safe_cast(sigla_uf as string) sigla_uf, + safe_cast(etapa_ensino as string) etapa_ensino, + safe_cast(tdi as float64) tdi, +from + `basedosdados-staging.br_inep_educacao_especial_staging.uf_distorcao_idade_serie` + as t diff --git a/models/br_inep_educacao_especial/br_inep_educacao_especial__uf_taxa_rendimento.sql b/models/br_inep_educacao_especial/br_inep_educacao_especial__uf_taxa_rendimento.sql new file mode 100644 index 00000000..4c8e81d2 --- /dev/null +++ b/models/br_inep_educacao_especial/br_inep_educacao_especial__uf_taxa_rendimento.sql @@ -0,0 +1,16 @@ +{{ + config( + alias="uf_taxa_rendimento", + schema="br_inep_educacao_especial", + materialized="table", + ) +}} + +select + safe_cast(ano as int64) ano, + safe_cast(sigla_uf as string) sigla_uf, + safe_cast(etapa_ensino as string) etapa_ensino, + safe_cast(taxa_aprovacao as float64) taxa_aprovacao, + safe_cast(taxa_reprovacao as float64) taxa_reprovacao, + safe_cast(taxa_abandono as float64) taxa_abandono, +from `basedosdados-staging.br_inep_educacao_especial_staging.uf_taxa_rendimento` as t diff --git a/models/br_inep_educacao_especial/code/educacao_especial_brasil_distorcao_idade_serie.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_brasil_distorcao_idade_serie.ipynb new file mode 100644 index 00000000..8216a77c --- /dev/null +++ b/models/br_inep_educacao_especial/code/educacao_especial_brasil_distorcao_idade_serie.ipynb @@ -0,0 +1,4579 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import zipfile\n", + "import pandas as pd\n", + "import basedosdados as bd\n", + "\n", + "INPUT = os.path.join(os.getcwd(), \"input\")\n", + "OUTPUT = os.path.join(os.getcwd(), \"output\")\n", + "\n", + "os.makedirs(INPUT, exist_ok=True)\n", + "os.makedirs(OUTPUT, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def read_sheet(sheet_name: str, skiprows: int = 3) -> pd.DataFrame:\n", + " return pd.read_excel(\n", + " os.path.join(\n", + " INPUT,\n", + " \"TDI_-_Alunos_com_defici_ncia.xlsx\"\n", + " ),\n", + " skiprows=skiprows,\n", + " sheet_name=sheet_name\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "excel_data = pd.ExcelFile(os.path.join(\n", + " INPUT,\n", + " \"TDI_-_Alunos_com_defici_ncia.xlsx\"\n", + " ))\n", + "\n", + "# Get the sheet names\n", + "sheet_names = excel_data.sheet_names" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "dfs = {\n", + " sheet_name: read_sheet(sheet_name)\n", + " for sheet_name in sheet_names\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'2007': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 72.2 60.6 \n", + " 1 Norte 76.1 69.7 \n", + " 2 Nordeste 74.4 64.7 \n", + " 3 Sudeste 69.3 56.5 \n", + " 4 Sul 73.3 59.6 \n", + " 5 Centro-Oeste 78.7 73.6 \n", + " 6 Rondônia 72.8 65.8 \n", + " 7 Acre 67.2 60.0 \n", + " 8 Amazonas 76.3 75.7 \n", + " 9 Roraima 71.0 69.5 \n", + " 10 Pará 78.2 68.4 \n", + " 11 Amapá 76.4 81.0 \n", + " 12 Tocantins 75.4 70.7 \n", + " 13 Maranhão 74.2 61.2 \n", + " 14 Piauí 83.0 69.3 \n", + " 15 Ceará 67.8 56.9 \n", + " 16 Rio Grande do Norte 66.4 74.0 \n", + " 17 Paraíba 73.3 71.6 \n", + " 18 Pernambuco 78.6 64.5 \n", + " 19 Alagoas 72.0 79.2 \n", + " 20 Sergipe 85.0 85.4 \n", + " 21 Bahia 76.7 66.5 \n", + " 22 Minas Gerais 79.0 69.1 \n", + " 23 Espírito Santo 67.5 69.0 \n", + " 24 Rio de Janeiro 85.1 77.2 \n", + " 25 São Paulo 60.7 50.2 \n", + " 26 Paraná 72.3 54.2 \n", + " 27 Santa Catarina 75.8 65.3 \n", + " 28 Rio Grande do Sul 73.6 66.1 \n", + " 29 Mato Grosso do Sul 86.0 81.2 \n", + " 30 Mato Grosso 76.3 66.2 \n", + " 31 Goiás 76.8 75.7 \n", + " 32 Distrito Federal 74.6 70.6 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 65.3 \n", + " 1 75.7 \n", + " 2 77.9 \n", + " 3 58.4 \n", + " 4 62.6 \n", + " 5 73.6 \n", + " 6 76.2 \n", + " 7 60.5 \n", + " 8 75.5 \n", + " 9 76.9 \n", + " 10 77.7 \n", + " 11 69.4 \n", + " 12 75.9 \n", + " 13 76.2 \n", + " 14 80.5 \n", + " 15 83.7 \n", + " 16 78.0 \n", + " 17 88.0 \n", + " 18 81.3 \n", + " 19 88.3 \n", + " 20 85.7 \n", + " 21 67.1 \n", + " 22 69.1 \n", + " 23 69.1 \n", + " 24 86.7 \n", + " 25 51.5 \n", + " 26 59.5 \n", + " 27 71.9 \n", + " 28 60.4 \n", + " 29 75.4 \n", + " 30 78.1 \n", + " 31 75.5 \n", + " 32 69.6 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2008': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 66.5 54.5 \n", + " 1 Norte 61.2 44.8 \n", + " 2 Nordeste 60.7 45.7 \n", + " 3 Sudeste 65.7 53.5 \n", + " 4 Sul 73.5 63.2 \n", + " 5 Centro-Oeste 72.3 63.6 \n", + " 6 Rondônia 55.1 35.9 \n", + " 7 Acre 43.8 29.7 \n", + " 8 Amazonas 58.0 57.0 \n", + " 9 Roraima 44.2 28.4 \n", + " 10 Pará 65.1 48.3 \n", + " 11 Amapá 57.0 54.4 \n", + " 12 Tocantins 67.1 46.3 \n", + " 13 Maranhão 62.8 44.3 \n", + " 14 Piauí 66.3 57.9 \n", + " 15 Ceará 55.9 38.6 \n", + " 16 Rio Grande do Norte 47.1 45.2 \n", + " 17 Paraíba 54.0 48.1 \n", + " 18 Pernambuco 73.5 58.6 \n", + " 19 Alagoas 49.0 46.1 \n", + " 20 Sergipe 68.5 58.9 \n", + " 21 Bahia 60.5 44.4 \n", + " 22 Minas Gerais 73.1 61.8 \n", + " 23 Espírito Santo 62.9 61.2 \n", + " 24 Rio de Janeiro 82.7 72.7 \n", + " 25 São Paulo 57.5 48.2 \n", + " 26 Paraná 78.0 60.6 \n", + " 27 Santa Catarina 69.5 64.5 \n", + " 28 Rio Grande do Sul 70.1 65.0 \n", + " 29 Mato Grosso do Sul 84.6 75.7 \n", + " 30 Mato Grosso 72.6 66.2 \n", + " 31 Goiás 68.0 62.1 \n", + " 32 Distrito Federal 62.0 60.0 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 57.3 \n", + " 1 50.7 \n", + " 2 49.4 \n", + " 3 56.6 \n", + " 4 65.0 \n", + " 5 60.5 \n", + " 6 48.8 \n", + " 7 47.2 \n", + " 8 62.2 \n", + " 9 41.2 \n", + " 10 48.7 \n", + " 11 60.0 \n", + " 12 48.2 \n", + " 13 45.3 \n", + " 14 51.0 \n", + " 15 48.7 \n", + " 16 44.2 \n", + " 17 52.3 \n", + " 18 65.1 \n", + " 19 61.1 \n", + " 20 65.7 \n", + " 21 41.9 \n", + " 22 58.6 \n", + " 23 73.5 \n", + " 24 80.2 \n", + " 25 51.5 \n", + " 26 59.6 \n", + " 27 72.5 \n", + " 28 64.0 \n", + " 29 80.0 \n", + " 30 72.7 \n", + " 31 55.8 \n", + " 32 60.7 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2009': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 68.4 54.8 \n", + " 1 Norte 69.8 51.0 \n", + " 2 Nordeste 66.9 50.7 \n", + " 3 Sudeste 65.6 52.1 \n", + " 4 Sul 75.8 63.8 \n", + " 5 Centro-Oeste 73.1 65.6 \n", + " 6 Rondônia 64.2 47.3 \n", + " 7 Acre 51.6 37.8 \n", + " 8 Amazonas 70.6 56.7 \n", + " 9 Roraima 50.6 51.7 \n", + " 10 Pará 74.2 54.0 \n", + " 11 Amapá 64.5 49.2 \n", + " 12 Tocantins 72.8 50.5 \n", + " 13 Maranhão 64.4 48.8 \n", + " 14 Piauí 70.6 47.2 \n", + " 15 Ceará 64.3 44.5 \n", + " 16 Rio Grande do Norte 53.4 57.9 \n", + " 17 Paraíba 70.4 47.9 \n", + " 18 Pernambuco 78.3 64.5 \n", + " 19 Alagoas 54.1 58.9 \n", + " 20 Sergipe 78.2 61.1 \n", + " 21 Bahia 65.5 50.2 \n", + " 22 Minas Gerais 75.4 64.4 \n", + " 23 Espírito Santo 69.9 60.2 \n", + " 24 Rio de Janeiro 84.6 74.3 \n", + " 25 São Paulo 57.0 47.3 \n", + " 26 Paraná 80.6 61.1 \n", + " 27 Santa Catarina 57.4 61.8 \n", + " 28 Rio Grande do Sul 75.0 67.9 \n", + " 29 Mato Grosso do Sul 84.1 78.9 \n", + " 30 Mato Grosso 73.2 69.7 \n", + " 31 Goiás 65.7 61.2 \n", + " 32 Distrito Federal 66.4 64.6 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 56.0 \n", + " 1 57.5 \n", + " 2 56.6 \n", + " 3 52.1 \n", + " 4 62.1 \n", + " 5 62.2 \n", + " 6 45.1 \n", + " 7 43.4 \n", + " 8 61.4 \n", + " 9 21.4 \n", + " 10 62.1 \n", + " 11 54.7 \n", + " 12 55.2 \n", + " 13 48.2 \n", + " 14 61.4 \n", + " 15 52.6 \n", + " 16 51.1 \n", + " 17 50.2 \n", + " 18 69.3 \n", + " 19 64.1 \n", + " 20 68.9 \n", + " 21 54.4 \n", + " 22 59.9 \n", + " 23 69.9 \n", + " 24 74.8 \n", + " 25 46.0 \n", + " 26 57.4 \n", + " 27 57.4 \n", + " 28 70.3 \n", + " 29 80.6 \n", + " 30 70.9 \n", + " 31 56.2 \n", + " 32 67.5 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2010': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 67.5 57.2 \n", + " 1 Norte 70.0 59.4 \n", + " 2 Nordeste 67.1 57.8 \n", + " 3 Sudeste 65.0 52.8 \n", + " 4 Sul 72.4 62.8 \n", + " 5 Centro-Oeste 68.6 67.2 \n", + " 6 Rondônia 67.5 60.7 \n", + " 7 Acre 57.2 49.4 \n", + " 8 Amazonas 70.7 64.2 \n", + " 9 Roraima 50.6 59.2 \n", + " 10 Pará 74.9 62.5 \n", + " 11 Amapá 66.7 54.7 \n", + " 12 Tocantins 68.6 57.5 \n", + " 13 Maranhão 65.2 55.5 \n", + " 14 Piauí 69.2 56.1 \n", + " 15 Ceará 64.2 50.7 \n", + " 16 Rio Grande do Norte 56.2 66.8 \n", + " 17 Paraíba 71.7 56.5 \n", + " 18 Pernambuco 74.9 65.6 \n", + " 19 Alagoas 57.6 60.2 \n", + " 20 Sergipe 78.2 72.2 \n", + " 21 Bahia 67.4 59.3 \n", + " 22 Minas Gerais 69.3 66.1 \n", + " 23 Espírito Santo 59.1 59.0 \n", + " 24 Rio de Janeiro 82.2 73.3 \n", + " 25 São Paulo 58.4 47.4 \n", + " 26 Paraná 76.0 59.0 \n", + " 27 Santa Catarina 54.1 62.1 \n", + " 28 Rio Grande do Sul 73.0 67.8 \n", + " 29 Mato Grosso do Sul 81.2 78.7 \n", + " 30 Mato Grosso 64.6 66.7 \n", + " 31 Goiás 62.2 66.2 \n", + " 32 Distrito Federal 62.7 61.7 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 56.4 \n", + " 1 63.0 \n", + " 2 60.3 \n", + " 3 52.1 \n", + " 4 55.6 \n", + " 5 64.6 \n", + " 6 45.9 \n", + " 7 56.6 \n", + " 8 70.9 \n", + " 9 58.6 \n", + " 10 70.8 \n", + " 11 54.1 \n", + " 12 56.0 \n", + " 13 56.7 \n", + " 14 65.9 \n", + " 15 50.6 \n", + " 16 67.4 \n", + " 17 55.6 \n", + " 18 69.9 \n", + " 19 69.1 \n", + " 20 80.1 \n", + " 21 57.6 \n", + " 22 58.9 \n", + " 23 60.1 \n", + " 24 67.7 \n", + " 25 47.3 \n", + " 26 51.1 \n", + " 27 53.0 \n", + " 28 62.9 \n", + " 29 75.6 \n", + " 30 69.6 \n", + " 31 63.2 \n", + " 32 61.7 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2011': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 66.5 58.5 \n", + " 1 Norte 69.1 63.2 \n", + " 2 Nordeste 66.9 61.9 \n", + " 3 Sudeste 64.7 53.6 \n", + " 4 Sul 69.1 60.6 \n", + " 5 Centro-Oeste 66.1 66.2 \n", + " 6 Rondônia 67.5 61.4 \n", + " 7 Acre 57.1 53.4 \n", + " 8 Amazonas 69.7 69.5 \n", + " 9 Roraima 49.4 65.6 \n", + " 10 Pará 73.4 66.5 \n", + " 11 Amapá 69.8 55.4 \n", + " 12 Tocantins 67.2 62.8 \n", + " 13 Maranhão 64.8 60.2 \n", + " 14 Piauí 67.1 61.0 \n", + " 15 Ceará 62.1 56.8 \n", + " 16 Rio Grande do Norte 57.6 68.9 \n", + " 17 Paraíba 71.2 60.8 \n", + " 18 Pernambuco 74.3 67.4 \n", + " 19 Alagoas 60.4 64.5 \n", + " 20 Sergipe 78.4 73.3 \n", + " 21 Bahia 67.7 61.6 \n", + " 22 Minas Gerais 65.1 65.3 \n", + " 23 Espírito Santo 54.4 58.0 \n", + " 24 Rio de Janeiro 79.6 72.8 \n", + " 25 São Paulo 60.4 47.5 \n", + " 26 Paraná 73.5 56.1 \n", + " 27 Santa Catarina 50.3 60.1 \n", + " 28 Rio Grande do Sul 69.8 66.4 \n", + " 29 Mato Grosso do Sul 78.4 79.3 \n", + " 30 Mato Grosso 58.8 62.5 \n", + " 31 Goiás 60.5 65.1 \n", + " 32 Distrito Federal 63.0 61.0 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 56.1 \n", + " 1 62.9 \n", + " 2 59.9 \n", + " 3 52.5 \n", + " 4 54.3 \n", + " 5 62.5 \n", + " 6 51.0 \n", + " 7 51.2 \n", + " 8 68.9 \n", + " 9 48.1 \n", + " 10 69.1 \n", + " 11 57.7 \n", + " 12 62.9 \n", + " 13 54.6 \n", + " 14 63.0 \n", + " 15 57.7 \n", + " 16 68.8 \n", + " 17 50.5 \n", + " 18 65.9 \n", + " 19 62.8 \n", + " 20 73.5 \n", + " 21 57.1 \n", + " 22 59.8 \n", + " 23 59.1 \n", + " 24 66.3 \n", + " 25 47.4 \n", + " 26 50.7 \n", + " 27 51.3 \n", + " 28 61.2 \n", + " 29 68.9 \n", + " 30 64.6 \n", + " 31 63.8 \n", + " 32 57.2 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2012': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 64.4 58.9 \n", + " 1 Norte 67.0 64.7 \n", + " 2 Nordeste 66.2 63.7 \n", + " 3 Sudeste 62.0 53.1 \n", + " 4 Sul 67.1 61.4 \n", + " 5 Centro-Oeste 62.5 64.3 \n", + " 6 Rondônia 67.0 60.8 \n", + " 7 Acre 54.5 58.5 \n", + " 8 Amazonas 66.0 72.1 \n", + " 9 Roraima 43.5 62.5 \n", + " 10 Pará 70.2 67.7 \n", + " 11 Amapá 68.3 61.9 \n", + " 12 Tocantins 68.3 62.9 \n", + " 13 Maranhão 62.6 60.3 \n", + " 14 Piauí 68.5 61.3 \n", + " 15 Ceará 57.8 59.4 \n", + " 16 Rio Grande do Norte 57.6 69.9 \n", + " 17 Paraíba 69.3 61.9 \n", + " 18 Pernambuco 74.2 69.6 \n", + " 19 Alagoas 61.7 66.2 \n", + " 20 Sergipe 79.5 75.4 \n", + " 21 Bahia 68.0 64.0 \n", + " 22 Minas Gerais 57.8 63.3 \n", + " 23 Espírito Santo 50.4 59.4 \n", + " 24 Rio de Janeiro 78.8 71.8 \n", + " 25 São Paulo 59.6 46.6 \n", + " 26 Paraná 73.8 56.9 \n", + " 27 Santa Catarina 41.5 59.9 \n", + " 28 Rio Grande do Sul 67.0 67.6 \n", + " 29 Mato Grosso do Sul 77.2 78.6 \n", + " 30 Mato Grosso 47.1 57.6 \n", + " 31 Goiás 56.6 63.0 \n", + " 32 Distrito Federal 62.6 59.9 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 55.3 \n", + " 1 62.9 \n", + " 2 60.9 \n", + " 3 51.4 \n", + " 4 51.8 \n", + " 5 61.5 \n", + " 6 53.9 \n", + " 7 52.0 \n", + " 8 66.0 \n", + " 9 57.5 \n", + " 10 70.9 \n", + " 11 52.5 \n", + " 12 61.0 \n", + " 13 57.8 \n", + " 14 58.3 \n", + " 15 59.0 \n", + " 16 67.7 \n", + " 17 52.1 \n", + " 18 67.1 \n", + " 19 63.5 \n", + " 20 73.7 \n", + " 21 59.8 \n", + " 22 59.8 \n", + " 23 56.2 \n", + " 24 60.5 \n", + " 25 46.6 \n", + " 26 46.9 \n", + " 27 49.9 \n", + " 28 59.4 \n", + " 29 69.8 \n", + " 30 57.5 \n", + " 31 64.7 \n", + " 32 53.6 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2013': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 61.8 59.3 \n", + " 1 Norte 65.1 65.2 \n", + " 2 Nordeste 63.8 66.2 \n", + " 3 Sudeste 59.5 52.2 \n", + " 4 Sul 63.0 63.8 \n", + " 5 Centro-Oeste 60.1 61.6 \n", + " 6 Rondônia 66.3 62.2 \n", + " 7 Acre 55.8 56.4 \n", + " 8 Amazonas 63.2 66.0 \n", + " 9 Roraima 38.4 58.2 \n", + " 10 Pará 69.1 69.9 \n", + " 11 Amapá 64.4 60.6 \n", + " 12 Tocantins 63.3 66.0 \n", + " 13 Maranhão 60.5 61.9 \n", + " 14 Piauí 66.0 66.3 \n", + " 15 Ceará 50.2 63.1 \n", + " 16 Rio Grande do Norte 56.4 72.8 \n", + " 17 Paraíba 68.0 67.2 \n", + " 18 Pernambuco 73.1 71.1 \n", + " 19 Alagoas 61.3 65.7 \n", + " 20 Sergipe 76.4 77.6 \n", + " 21 Bahia 66.8 65.8 \n", + " 22 Minas Gerais 52.4 60.5 \n", + " 23 Espírito Santo 47.8 61.8 \n", + " 24 Rio de Janeiro 77.1 74.3 \n", + " 25 São Paulo 58.4 44.8 \n", + " 26 Paraná 70.7 59.6 \n", + " 27 Santa Catarina 35.1 59.7 \n", + " 28 Rio Grande do Sul 62.8 70.3 \n", + " 29 Mato Grosso do Sul 76.4 75.2 \n", + " 30 Mato Grosso 41.9 47.7 \n", + " 31 Goiás 53.5 64.7 \n", + " 32 Distrito Federal 63.5 58.7 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 55.4 \n", + " 1 62.0 \n", + " 2 61.9 \n", + " 3 51.0 \n", + " 4 52.9 \n", + " 5 61.4 \n", + " 6 54.1 \n", + " 7 48.6 \n", + " 8 63.9 \n", + " 9 60.3 \n", + " 10 70.6 \n", + " 11 54.4 \n", + " 12 60.4 \n", + " 13 57.8 \n", + " 14 63.2 \n", + " 15 60.9 \n", + " 16 65.5 \n", + " 17 56.9 \n", + " 18 71.0 \n", + " 19 55.5 \n", + " 20 72.9 \n", + " 21 60.0 \n", + " 22 61.1 \n", + " 23 51.7 \n", + " 24 63.1 \n", + " 25 46.1 \n", + " 26 48.2 \n", + " 27 51.8 \n", + " 28 59.4 \n", + " 29 67.8 \n", + " 30 60.5 \n", + " 31 65.1 \n", + " 32 53.9 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2014': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 58.7 59.6 \n", + " 1 Norte 60.7 65.6 \n", + " 2 Nordeste 60.4 68.1 \n", + " 3 Sudeste 56.4 51.9 \n", + " 4 Sul 61.2 64.5 \n", + " 5 Centro-Oeste 56.0 58.6 \n", + " 6 Rondônia 57.7 63.7 \n", + " 7 Acre 53.6 55.5 \n", + " 8 Amazonas 61.6 67.2 \n", + " 9 Roraima 35.5 54.8 \n", + " 10 Pará 64.8 72.0 \n", + " 11 Amapá 60.4 60.3 \n", + " 12 Tocantins 56.9 65.2 \n", + " 13 Maranhão 59.0 63.6 \n", + " 14 Piauí 62.1 68.2 \n", + " 15 Ceará 42.5 63.9 \n", + " 16 Rio Grande do Norte 52.5 74.8 \n", + " 17 Paraíba 65.0 69.4 \n", + " 18 Pernambuco 71.2 72.7 \n", + " 19 Alagoas 60.1 69.4 \n", + " 20 Sergipe 71.9 81.3 \n", + " 21 Bahia 64.4 67.9 \n", + " 22 Minas Gerais 46.0 56.6 \n", + " 23 Espírito Santo 45.6 63.0 \n", + " 24 Rio de Janeiro 75.3 73.7 \n", + " 25 São Paulo 56.1 44.9 \n", + " 26 Paraná 73.2 61.2 \n", + " 27 Santa Catarina 30.0 57.5 \n", + " 28 Rio Grande do Sul 58.9 70.9 \n", + " 29 Mato Grosso do Sul 75.1 73.4 \n", + " 30 Mato Grosso 35.0 37.8 \n", + " 31 Goiás 46.7 62.0 \n", + " 32 Distrito Federal 62.9 61.1 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 54.5 \n", + " 1 58.9 \n", + " 2 64.7 \n", + " 3 49.3 \n", + " 4 52.4 \n", + " 5 61.0 \n", + " 6 52.9 \n", + " 7 49.4 \n", + " 8 67.0 \n", + " 9 64.4 \n", + " 10 63.8 \n", + " 11 57.8 \n", + " 12 60.8 \n", + " 13 61.1 \n", + " 14 63.6 \n", + " 15 62.2 \n", + " 16 69.0 \n", + " 17 60.3 \n", + " 18 68.6 \n", + " 19 61.3 \n", + " 20 74.1 \n", + " 21 67.8 \n", + " 22 60.5 \n", + " 23 53.9 \n", + " 24 63.1 \n", + " 25 43.5 \n", + " 26 48.4 \n", + " 27 50.4 \n", + " 28 59.0 \n", + " 29 66.4 \n", + " 30 58.7 \n", + " 31 64.4 \n", + " 32 54.2 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2015': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 55.8 59.2 \n", + " 1 Norte 57.8 66.0 \n", + " 2 Nordeste 57.2 67.6 \n", + " 3 Sudeste 53.8 51.1 \n", + " 4 Sul 57.6 63.6 \n", + " 5 Centro-Oeste 53.3 56.2 \n", + " 6 Rondônia 53.0 62.4 \n", + " 7 Acre 50.5 54.1 \n", + " 8 Amazonas 59.2 65.1 \n", + " 9 Roraima 32.1 54.7 \n", + " 10 Pará 62.8 73.7 \n", + " 11 Amapá 55.7 61.7 \n", + " 12 Tocantins 53.1 64.9 \n", + " 13 Maranhão 55.3 63.7 \n", + " 14 Piauí 60.1 67.5 \n", + " 15 Ceará 36.3 60.4 \n", + " 16 Rio Grande do Norte 49.0 74.2 \n", + " 17 Paraíba 62.9 70.4 \n", + " 18 Pernambuco 69.3 73.1 \n", + " 19 Alagoas 58.1 69.9 \n", + " 20 Sergipe 68.2 81.0 \n", + " 21 Bahia 62.7 68.6 \n", + " 22 Minas Gerais 41.7 53.1 \n", + " 23 Espírito Santo 43.0 62.6 \n", + " 24 Rio de Janeiro 72.9 72.3 \n", + " 25 São Paulo 54.5 44.4 \n", + " 26 Paraná 69.7 61.2 \n", + " 27 Santa Catarina 26.5 54.1 \n", + " 28 Rio Grande do Sul 56.0 70.5 \n", + " 29 Mato Grosso do Sul 73.1 69.4 \n", + " 30 Mato Grosso 30.8 34.0 \n", + " 31 Goiás 42.7 60.6 \n", + " 32 Distrito Federal 62.1 59.7 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 53.7 \n", + " 1 58.4 \n", + " 2 64.4 \n", + " 3 48.3 \n", + " 4 52.0 \n", + " 5 58.3 \n", + " 6 53.3 \n", + " 7 49.6 \n", + " 8 68.1 \n", + " 9 54.5 \n", + " 10 65.6 \n", + " 11 55.5 \n", + " 12 58.9 \n", + " 13 61.0 \n", + " 14 65.6 \n", + " 15 62.6 \n", + " 16 69.1 \n", + " 17 60.4 \n", + " 18 66.0 \n", + " 19 57.8 \n", + " 20 73.7 \n", + " 21 67.6 \n", + " 22 60.9 \n", + " 23 54.2 \n", + " 24 62.0 \n", + " 25 41.7 \n", + " 26 48.0 \n", + " 27 50.1 \n", + " 28 58.0 \n", + " 29 67.2 \n", + " 30 52.3 \n", + " 31 61.6 \n", + " 32 52.7 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2016': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 53.5 58.2 \n", + " 1 Norte 55.7 65.6 \n", + " 2 Nordeste 54.3 66.9 \n", + " 3 Sudeste 51.4 49.7 \n", + " 4 Sul 56.9 61.2 \n", + " 5 Centro-Oeste 51.0 55.3 \n", + " 6 Rondônia 47.2 62.7 \n", + " 7 Acre 46.8 55.5 \n", + " 8 Amazonas 57.7 63.3 \n", + " 9 Roraima 28.9 52.9 \n", + " 10 Pará 61.3 73.2 \n", + " 11 Amapá 54.4 62.0 \n", + " 12 Tocantins 52.5 62.7 \n", + " 13 Maranhão 53.1 64.2 \n", + " 14 Piauí 57.3 67.5 \n", + " 15 Ceará 30.9 55.6 \n", + " 16 Rio Grande do Norte 45.4 72.9 \n", + " 17 Paraíba 60.3 71.4 \n", + " 18 Pernambuco 65.8 74.0 \n", + " 19 Alagoas 56.1 70.7 \n", + " 20 Sergipe 66.1 79.4 \n", + " 21 Bahia 61.4 69.9 \n", + " 22 Minas Gerais 39.3 50.9 \n", + " 23 Espírito Santo 40.7 59.8 \n", + " 24 Rio de Janeiro 70.1 70.2 \n", + " 25 São Paulo 51.9 43.3 \n", + " 26 Paraná 69.5 59.3 \n", + " 27 Santa Catarina 24.5 47.9 \n", + " 28 Rio Grande do Sul 54.9 68.4 \n", + " 29 Mato Grosso do Sul 71.9 67.3 \n", + " 30 Mato Grosso 30.0 31.1 \n", + " 31 Goiás 40.0 60.1 \n", + " 32 Distrito Federal 59.7 59.9 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 53.9 \n", + " 1 60.4 \n", + " 2 64.6 \n", + " 3 47.8 \n", + " 4 53.6 \n", + " 5 57.3 \n", + " 6 54.4 \n", + " 7 51.1 \n", + " 8 67.3 \n", + " 9 51.1 \n", + " 10 69.7 \n", + " 11 55.5 \n", + " 12 60.9 \n", + " 13 58.7 \n", + " 14 63.5 \n", + " 15 64.1 \n", + " 16 68.3 \n", + " 17 65.5 \n", + " 18 65.4 \n", + " 19 64.6 \n", + " 20 72.0 \n", + " 21 68.0 \n", + " 22 59.8 \n", + " 23 56.2 \n", + " 24 58.3 \n", + " 25 41.2 \n", + " 26 49.1 \n", + " 27 50.7 \n", + " 28 61.0 \n", + " 29 68.0 \n", + " 30 49.8 \n", + " 31 61.6 \n", + " 32 50.8 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2017': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 51.4 56.8 \n", + " 1 Norte 53.1 64.8 \n", + " 2 Nordeste 52.2 65.3 \n", + " 3 Sudeste 49.4 48.3 \n", + " 4 Sul 54.6 59.2 \n", + " 5 Centro-Oeste 48.2 52.5 \n", + " 6 Rondônia 42.7 60.5 \n", + " 7 Acre 45.1 54.0 \n", + " 8 Amazonas 54.5 62.9 \n", + " 9 Roraima 26.2 50.3 \n", + " 10 Pará 59.9 72.7 \n", + " 11 Amapá 53.8 60.4 \n", + " 12 Tocantins 44.1 61.6 \n", + " 13 Maranhão 50.5 63.3 \n", + " 14 Piauí 54.4 63.7 \n", + " 15 Ceará 28.4 50.8 \n", + " 16 Rio Grande do Norte 43.2 69.6 \n", + " 17 Paraíba 59.1 71.3 \n", + " 18 Pernambuco 62.6 73.2 \n", + " 19 Alagoas 55.1 69.8 \n", + " 20 Sergipe 62.3 78.4 \n", + " 21 Bahia 59.6 70.4 \n", + " 22 Minas Gerais 36.8 49.4 \n", + " 23 Espírito Santo 37.9 58.2 \n", + " 24 Rio de Janeiro 68.0 67.8 \n", + " 25 São Paulo 50.8 41.2 \n", + " 26 Paraná 66.0 58.0 \n", + " 27 Santa Catarina 23.0 44.4 \n", + " 28 Rio Grande do Sul 53.4 66.9 \n", + " 29 Mato Grosso do Sul 71.5 68.3 \n", + " 30 Mato Grosso 29.3 33.3 \n", + " 31 Goiás 37.3 54.6 \n", + " 32 Distrito Federal 56.8 56.1 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 53.1 \n", + " 1 59.9 \n", + " 2 61.8 \n", + " 3 46.7 \n", + " 4 54.9 \n", + " 5 55.5 \n", + " 6 52.2 \n", + " 7 48.1 \n", + " 8 64.1 \n", + " 9 51.2 \n", + " 10 69.8 \n", + " 11 54.6 \n", + " 12 59.5 \n", + " 13 56.2 \n", + " 14 59.9 \n", + " 15 60.8 \n", + " 16 67.9 \n", + " 17 66.9 \n", + " 18 65.2 \n", + " 19 52.6 \n", + " 20 75.2 \n", + " 21 65.7 \n", + " 22 57.4 \n", + " 23 51.5 \n", + " 24 58.6 \n", + " 25 39.6 \n", + " 26 51.2 \n", + " 27 50.5 \n", + " 28 62.0 \n", + " 29 65.6 \n", + " 30 46.4 \n", + " 31 60.5 \n", + " 32 50.5 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2018': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 48.4 55.0 \n", + " 1 Norte 49.9 62.6 \n", + " 2 Nordeste 48.9 63.4 \n", + " 3 Sudeste 46.6 46.8 \n", + " 4 Sul 52.5 57.0 \n", + " 5 Centro-Oeste 44.1 49.8 \n", + " 6 Rondônia 36.6 58.4 \n", + " 7 Acre 41.4 51.5 \n", + " 8 Amazonas 51.6 60.7 \n", + " 9 Roraima 25.2 45.0 \n", + " 10 Pará 57.7 70.7 \n", + " 11 Amapá 51.3 60.2 \n", + " 12 Tocantins 38.7 57.7 \n", + " 13 Maranhão 47.7 61.2 \n", + " 14 Piauí 50.9 62.2 \n", + " 15 Ceará 24.6 46.0 \n", + " 16 Rio Grande do Norte 41.0 67.5 \n", + " 17 Paraíba 57.0 70.7 \n", + " 18 Pernambuco 57.0 70.8 \n", + " 19 Alagoas 50.1 68.2 \n", + " 20 Sergipe 60.8 76.4 \n", + " 21 Bahia 56.8 69.9 \n", + " 22 Minas Gerais 34.0 48.1 \n", + " 23 Espírito Santo 34.5 55.4 \n", + " 24 Rio de Janeiro 64.7 66.6 \n", + " 25 São Paulo 48.3 38.8 \n", + " 26 Paraná 63.0 55.2 \n", + " 27 Santa Catarina 21.7 41.4 \n", + " 28 Rio Grande do Sul 51.9 65.4 \n", + " 29 Mato Grosso do Sul 67.4 67.6 \n", + " 30 Mato Grosso 27.9 34.6 \n", + " 31 Goiás 32.9 50.0 \n", + " 32 Distrito Federal 54.3 53.8 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 53.2 \n", + " 1 61.0 \n", + " 2 61.2 \n", + " 3 47.0 \n", + " 4 54.1 \n", + " 5 53.2 \n", + " 6 51.2 \n", + " 7 48.4 \n", + " 8 61.4 \n", + " 9 52.5 \n", + " 10 70.6 \n", + " 11 56.1 \n", + " 12 58.8 \n", + " 13 56.3 \n", + " 14 55.7 \n", + " 15 57.4 \n", + " 16 67.7 \n", + " 17 66.1 \n", + " 18 63.2 \n", + " 19 60.0 \n", + " 20 75.8 \n", + " 21 63.9 \n", + " 22 55.4 \n", + " 23 52.4 \n", + " 24 60.3 \n", + " 25 38.8 \n", + " 26 49.9 \n", + " 27 47.4 \n", + " 28 63.0 \n", + " 29 62.6 \n", + " 30 43.8 \n", + " 31 58.2 \n", + " 32 47.6 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2019': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 44.9 53.3 \n", + " 1 Norte 46.8 60.9 \n", + " 2 Nordeste 45.1 61.3 \n", + " 3 Sudeste 42.6 45.2 \n", + " 4 Sul 50.1 55.0 \n", + " 5 Centro-Oeste 41.1 48.3 \n", + " 6 Rondônia 30.9 52.9 \n", + " 7 Acre 36.9 49.6 \n", + " 8 Amazonas 49.0 59.5 \n", + " 9 Roraima 24.8 43.4 \n", + " 10 Pará 55.1 69.7 \n", + " 11 Amapá 46.4 59.8 \n", + " 12 Tocantins 35.5 55.8 \n", + " 13 Maranhão 44.7 59.9 \n", + " 14 Piauí 45.6 62.2 \n", + " 15 Ceará 21.6 40.5 \n", + " 16 Rio Grande do Norte 37.6 64.8 \n", + " 17 Paraíba 53.8 70.6 \n", + " 18 Pernambuco 51.2 68.4 \n", + " 19 Alagoas 45.0 65.0 \n", + " 20 Sergipe 57.4 75.1 \n", + " 21 Bahia 53.5 69.5 \n", + " 22 Minas Gerais 30.7 45.6 \n", + " 23 Espírito Santo 30.8 53.4 \n", + " 24 Rio de Janeiro 60.4 65.9 \n", + " 25 São Paulo 43.9 37.5 \n", + " 26 Paraná 61.6 51.5 \n", + " 27 Santa Catarina 20.0 38.7 \n", + " 28 Rio Grande do Sul 49.1 64.2 \n", + " 29 Mato Grosso do Sul 65.0 66.2 \n", + " 30 Mato Grosso 27.5 33.9 \n", + " 31 Goiás 30.2 47.7 \n", + " 32 Distrito Federal 44.8 50.5 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 51.1 \n", + " 1 59.3 \n", + " 2 59.4 \n", + " 3 44.7 \n", + " 4 50.9 \n", + " 5 51.4 \n", + " 6 48.3 \n", + " 7 46.7 \n", + " 8 62.3 \n", + " 9 47.7 \n", + " 10 68.5 \n", + " 11 51.7 \n", + " 12 56.8 \n", + " 13 54.8 \n", + " 14 55.8 \n", + " 15 52.0 \n", + " 16 66.1 \n", + " 17 64.5 \n", + " 18 65.5 \n", + " 19 59.3 \n", + " 20 74.6 \n", + " 21 63.1 \n", + " 22 50.7 \n", + " 23 50.2 \n", + " 24 59.5 \n", + " 25 36.9 \n", + " 26 45.2 \n", + " 27 43.0 \n", + " 28 59.4 \n", + " 29 59.2 \n", + " 30 43.9 \n", + " 31 56.0 \n", + " 32 45.1 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2020': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 41.7 51.6 \n", + " 1 Norte 44.0 58.8 \n", + " 2 Nordeste 40.9 59.3 \n", + " 3 Sudeste 39.3 43.8 \n", + " 4 Sul 48.2 53.7 \n", + " 5 Centro-Oeste 38.1 45.4 \n", + " 6 Rondônia 29.2 47.8 \n", + " 7 Acre 34.9 47.4 \n", + " 8 Amazonas 45.9 57.8 \n", + " 9 Roraima 25.2 37.0 \n", + " 10 Pará 52.0 68.6 \n", + " 11 Amapá 41.1 59.7 \n", + " 12 Tocantins 32.2 52.9 \n", + " 13 Maranhão 41.2 58.6 \n", + " 14 Piauí 39.8 58.2 \n", + " 15 Ceará 18.4 37.1 \n", + " 16 Rio Grande do Norte 33.4 62.5 \n", + " 17 Paraíba 50.3 68.4 \n", + " 18 Pernambuco 43.5 65.2 \n", + " 19 Alagoas 39.7 61.2 \n", + " 20 Sergipe 53.9 74.8 \n", + " 21 Bahia 50.6 68.8 \n", + " 22 Minas Gerais 28.2 44.7 \n", + " 23 Espírito Santo 27.9 51.2 \n", + " 24 Rio de Janeiro 55.4 64.5 \n", + " 25 São Paulo 40.7 35.4 \n", + " 26 Paraná 61.5 49.7 \n", + " 27 Santa Catarina 18.7 37.4 \n", + " 28 Rio Grande do Sul 46.0 63.8 \n", + " 29 Mato Grosso do Sul 61.7 65.3 \n", + " 30 Mato Grosso 24.5 35.0 \n", + " 31 Goiás 29.2 42.5 \n", + " 32 Distrito Federal 39.9 49.1 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 50.3 \n", + " 1 58.5 \n", + " 2 58.5 \n", + " 3 44.2 \n", + " 4 48.8 \n", + " 5 49.4 \n", + " 6 49.1 \n", + " 7 46.7 \n", + " 8 58.6 \n", + " 9 48.3 \n", + " 10 67.6 \n", + " 11 52.2 \n", + " 12 54.8 \n", + " 13 52.4 \n", + " 14 55.8 \n", + " 15 49.2 \n", + " 16 62.7 \n", + " 17 62.1 \n", + " 18 63.7 \n", + " 19 58.8 \n", + " 20 71.3 \n", + " 21 64.9 \n", + " 22 49.6 \n", + " 23 49.4 \n", + " 24 60.4 \n", + " 25 35.7 \n", + " 26 44.3 \n", + " 27 40.0 \n", + " 28 58.0 \n", + " 29 59.3 \n", + " 30 42.7 \n", + " 31 51.9 \n", + " 32 44.3 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2021': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 36.6 49.5 \n", + " 1 Norte 36.8 57.7 \n", + " 2 Nordeste 34.2 56.9 \n", + " 3 Sudeste 36.2 41.1 \n", + " 4 Sul 43.1 51.1 \n", + " 5 Centro-Oeste 33.6 43.3 \n", + " 6 Rondônia 23.4 43.6 \n", + " 7 Acre 27.9 46.2 \n", + " 8 Amazonas 39.8 56.7 \n", + " 9 Roraima 20.5 35.8 \n", + " 10 Pará 43.4 68.1 \n", + " 11 Amapá 34.0 58.8 \n", + " 12 Tocantins 27.0 48.7 \n", + " 13 Maranhão 35.3 56.5 \n", + " 14 Piauí 33.2 56.3 \n", + " 15 Ceará 14.6 33.3 \n", + " 16 Rio Grande do Norte 23.6 60.1 \n", + " 17 Paraíba 42.4 65.7 \n", + " 18 Pernambuco 36.4 60.8 \n", + " 19 Alagoas 32.2 57.4 \n", + " 20 Sergipe 44.5 72.2 \n", + " 21 Bahia 42.9 66.6 \n", + " 22 Minas Gerais 25.0 40.4 \n", + " 23 Espírito Santo 20.4 47.6 \n", + " 24 Rio de Janeiro 49.3 63.4 \n", + " 25 São Paulo 38.8 32.6 \n", + " 26 Paraná 60.2 46.1 \n", + " 27 Santa Catarina 14.6 35.1 \n", + " 28 Rio Grande do Sul 37.6 61.6 \n", + " 29 Mato Grosso do Sul 56.0 63.4 \n", + " 30 Mato Grosso 21.1 32.3 \n", + " 31 Goiás 25.2 39.8 \n", + " 32 Distrito Federal 36.0 48.9 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 48.3 \n", + " 1 57.5 \n", + " 2 57.8 \n", + " 3 41.8 \n", + " 4 44.9 \n", + " 5 46.1 \n", + " 6 48.7 \n", + " 7 44.4 \n", + " 8 55.9 \n", + " 9 43.7 \n", + " 10 66.7 \n", + " 11 54.5 \n", + " 12 52.9 \n", + " 13 50.8 \n", + " 14 55.3 \n", + " 15 44.4 \n", + " 16 59.9 \n", + " 17 61.8 \n", + " 18 61.3 \n", + " 19 58.9 \n", + " 20 68.7 \n", + " 21 68.6 \n", + " 22 45.1 \n", + " 23 47.3 \n", + " 24 57.8 \n", + " 25 34.9 \n", + " 26 40.6 \n", + " 27 37.1 \n", + " 28 53.2 \n", + " 29 57.9 \n", + " 30 40.7 \n", + " 31 46.5 \n", + " 32 43.6 \n", + " 33 NaN \n", + " 34 NaN }" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfs" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sheet: 2007\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2008\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2009\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2010\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2011\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2012\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2013\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2014\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2015\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2016\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2017\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2018\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2019\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2020\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2021\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n" + ] + } + ], + "source": [ + "for sheet_name, df in dfs.items():\n", + " print(f\"Sheet: {sheet_name}\")\n", + " print(df.columns) # This will print the column names of each DataFrame\n", + " print() # Adds a blank line for readability\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "dataframes = {}\n", + "\n", + "for table_name, columns in dfs.items():\n", + " df = pd.DataFrame(columns) # Create DataFrame for each table\n", + " dataframes[table_name] = df # Store the DataFrame in a dictionary\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "RENAME_COLUMNS = {\n", + " 'Unnamed: 1':'id_uf',\n", + " 'Unnamed: 2':'nome',\n", + " 'Distorção Idade-Série': 'Ensino Fundamental – Anos Iniciais',\n", + " 'Distorção Idade-Série.1':'Ensino Fundamental – Anos Finais',\n", + " 'Distorção Idade-Série.2':'Ensino Médio Regular' \n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n", + " cols_drop = [\n", + " col\n", + " for col in df.columns\n", + " if col.startswith(\"Unnamed\")\n", + " ]\n", + "\n", + " return df.drop(columns=cols_drop)\n", + "\n", + "dfs = {\n", + " name: drop_unused_columns(\n", + " df.rename(columns=RENAME_COLUMNS, errors=\"raise\")\n", + " )\n", + " for name, df in dfs.items()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'2007': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 72.2 60.6 \n", + " 1 76.1 69.7 \n", + " 2 74.4 64.7 \n", + " 3 69.3 56.5 \n", + " 4 73.3 59.6 \n", + " 5 78.7 73.6 \n", + " 6 72.8 65.8 \n", + " 7 67.2 60.0 \n", + " 8 76.3 75.7 \n", + " 9 71.0 69.5 \n", + " 10 78.2 68.4 \n", + " 11 76.4 81.0 \n", + " 12 75.4 70.7 \n", + " 13 74.2 61.2 \n", + " 14 83.0 69.3 \n", + " 15 67.8 56.9 \n", + " 16 66.4 74.0 \n", + " 17 73.3 71.6 \n", + " 18 78.6 64.5 \n", + " 19 72.0 79.2 \n", + " 20 85.0 85.4 \n", + " 21 76.7 66.5 \n", + " 22 79.0 69.1 \n", + " 23 67.5 69.0 \n", + " 24 85.1 77.2 \n", + " 25 60.7 50.2 \n", + " 26 72.3 54.2 \n", + " 27 75.8 65.3 \n", + " 28 73.6 66.1 \n", + " 29 86.0 81.2 \n", + " 30 76.3 66.2 \n", + " 31 76.8 75.7 \n", + " 32 74.6 70.6 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 65.3 \n", + " 1 75.7 \n", + " 2 77.9 \n", + " 3 58.4 \n", + " 4 62.6 \n", + " 5 73.6 \n", + " 6 76.2 \n", + " 7 60.5 \n", + " 8 75.5 \n", + " 9 76.9 \n", + " 10 77.7 \n", + " 11 69.4 \n", + " 12 75.9 \n", + " 13 76.2 \n", + " 14 80.5 \n", + " 15 83.7 \n", + " 16 78.0 \n", + " 17 88.0 \n", + " 18 81.3 \n", + " 19 88.3 \n", + " 20 85.7 \n", + " 21 67.1 \n", + " 22 69.1 \n", + " 23 69.1 \n", + " 24 86.7 \n", + " 25 51.5 \n", + " 26 59.5 \n", + " 27 71.9 \n", + " 28 60.4 \n", + " 29 75.4 \n", + " 30 78.1 \n", + " 31 75.5 \n", + " 32 69.6 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2008': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 66.5 54.5 \n", + " 1 61.2 44.8 \n", + " 2 60.7 45.7 \n", + " 3 65.7 53.5 \n", + " 4 73.5 63.2 \n", + " 5 72.3 63.6 \n", + " 6 55.1 35.9 \n", + " 7 43.8 29.7 \n", + " 8 58.0 57.0 \n", + " 9 44.2 28.4 \n", + " 10 65.1 48.3 \n", + " 11 57.0 54.4 \n", + " 12 67.1 46.3 \n", + " 13 62.8 44.3 \n", + " 14 66.3 57.9 \n", + " 15 55.9 38.6 \n", + " 16 47.1 45.2 \n", + " 17 54.0 48.1 \n", + " 18 73.5 58.6 \n", + " 19 49.0 46.1 \n", + " 20 68.5 58.9 \n", + " 21 60.5 44.4 \n", + " 22 73.1 61.8 \n", + " 23 62.9 61.2 \n", + " 24 82.7 72.7 \n", + " 25 57.5 48.2 \n", + " 26 78.0 60.6 \n", + " 27 69.5 64.5 \n", + " 28 70.1 65.0 \n", + " 29 84.6 75.7 \n", + " 30 72.6 66.2 \n", + " 31 68.0 62.1 \n", + " 32 62.0 60.0 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 57.3 \n", + " 1 50.7 \n", + " 2 49.4 \n", + " 3 56.6 \n", + " 4 65.0 \n", + " 5 60.5 \n", + " 6 48.8 \n", + " 7 47.2 \n", + " 8 62.2 \n", + " 9 41.2 \n", + " 10 48.7 \n", + " 11 60.0 \n", + " 12 48.2 \n", + " 13 45.3 \n", + " 14 51.0 \n", + " 15 48.7 \n", + " 16 44.2 \n", + " 17 52.3 \n", + " 18 65.1 \n", + " 19 61.1 \n", + " 20 65.7 \n", + " 21 41.9 \n", + " 22 58.6 \n", + " 23 73.5 \n", + " 24 80.2 \n", + " 25 51.5 \n", + " 26 59.6 \n", + " 27 72.5 \n", + " 28 64.0 \n", + " 29 80.0 \n", + " 30 72.7 \n", + " 31 55.8 \n", + " 32 60.7 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2009': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 68.4 54.8 \n", + " 1 69.8 51.0 \n", + " 2 66.9 50.7 \n", + " 3 65.6 52.1 \n", + " 4 75.8 63.8 \n", + " 5 73.1 65.6 \n", + " 6 64.2 47.3 \n", + " 7 51.6 37.8 \n", + " 8 70.6 56.7 \n", + " 9 50.6 51.7 \n", + " 10 74.2 54.0 \n", + " 11 64.5 49.2 \n", + " 12 72.8 50.5 \n", + " 13 64.4 48.8 \n", + " 14 70.6 47.2 \n", + " 15 64.3 44.5 \n", + " 16 53.4 57.9 \n", + " 17 70.4 47.9 \n", + " 18 78.3 64.5 \n", + " 19 54.1 58.9 \n", + " 20 78.2 61.1 \n", + " 21 65.5 50.2 \n", + " 22 75.4 64.4 \n", + " 23 69.9 60.2 \n", + " 24 84.6 74.3 \n", + " 25 57.0 47.3 \n", + " 26 80.6 61.1 \n", + " 27 57.4 61.8 \n", + " 28 75.0 67.9 \n", + " 29 84.1 78.9 \n", + " 30 73.2 69.7 \n", + " 31 65.7 61.2 \n", + " 32 66.4 64.6 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 56.0 \n", + " 1 57.5 \n", + " 2 56.6 \n", + " 3 52.1 \n", + " 4 62.1 \n", + " 5 62.2 \n", + " 6 45.1 \n", + " 7 43.4 \n", + " 8 61.4 \n", + " 9 21.4 \n", + " 10 62.1 \n", + " 11 54.7 \n", + " 12 55.2 \n", + " 13 48.2 \n", + " 14 61.4 \n", + " 15 52.6 \n", + " 16 51.1 \n", + " 17 50.2 \n", + " 18 69.3 \n", + " 19 64.1 \n", + " 20 68.9 \n", + " 21 54.4 \n", + " 22 59.9 \n", + " 23 69.9 \n", + " 24 74.8 \n", + " 25 46.0 \n", + " 26 57.4 \n", + " 27 57.4 \n", + " 28 70.3 \n", + " 29 80.6 \n", + " 30 70.9 \n", + " 31 56.2 \n", + " 32 67.5 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2010': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 67.5 57.2 \n", + " 1 70.0 59.4 \n", + " 2 67.1 57.8 \n", + " 3 65.0 52.8 \n", + " 4 72.4 62.8 \n", + " 5 68.6 67.2 \n", + " 6 67.5 60.7 \n", + " 7 57.2 49.4 \n", + " 8 70.7 64.2 \n", + " 9 50.6 59.2 \n", + " 10 74.9 62.5 \n", + " 11 66.7 54.7 \n", + " 12 68.6 57.5 \n", + " 13 65.2 55.5 \n", + " 14 69.2 56.1 \n", + " 15 64.2 50.7 \n", + " 16 56.2 66.8 \n", + " 17 71.7 56.5 \n", + " 18 74.9 65.6 \n", + " 19 57.6 60.2 \n", + " 20 78.2 72.2 \n", + " 21 67.4 59.3 \n", + " 22 69.3 66.1 \n", + " 23 59.1 59.0 \n", + " 24 82.2 73.3 \n", + " 25 58.4 47.4 \n", + " 26 76.0 59.0 \n", + " 27 54.1 62.1 \n", + " 28 73.0 67.8 \n", + " 29 81.2 78.7 \n", + " 30 64.6 66.7 \n", + " 31 62.2 66.2 \n", + " 32 62.7 61.7 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 56.4 \n", + " 1 63.0 \n", + " 2 60.3 \n", + " 3 52.1 \n", + " 4 55.6 \n", + " 5 64.6 \n", + " 6 45.9 \n", + " 7 56.6 \n", + " 8 70.9 \n", + " 9 58.6 \n", + " 10 70.8 \n", + " 11 54.1 \n", + " 12 56.0 \n", + " 13 56.7 \n", + " 14 65.9 \n", + " 15 50.6 \n", + " 16 67.4 \n", + " 17 55.6 \n", + " 18 69.9 \n", + " 19 69.1 \n", + " 20 80.1 \n", + " 21 57.6 \n", + " 22 58.9 \n", + " 23 60.1 \n", + " 24 67.7 \n", + " 25 47.3 \n", + " 26 51.1 \n", + " 27 53.0 \n", + " 28 62.9 \n", + " 29 75.6 \n", + " 30 69.6 \n", + " 31 63.2 \n", + " 32 61.7 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2011': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 66.5 58.5 \n", + " 1 69.1 63.2 \n", + " 2 66.9 61.9 \n", + " 3 64.7 53.6 \n", + " 4 69.1 60.6 \n", + " 5 66.1 66.2 \n", + " 6 67.5 61.4 \n", + " 7 57.1 53.4 \n", + " 8 69.7 69.5 \n", + " 9 49.4 65.6 \n", + " 10 73.4 66.5 \n", + " 11 69.8 55.4 \n", + " 12 67.2 62.8 \n", + " 13 64.8 60.2 \n", + " 14 67.1 61.0 \n", + " 15 62.1 56.8 \n", + " 16 57.6 68.9 \n", + " 17 71.2 60.8 \n", + " 18 74.3 67.4 \n", + " 19 60.4 64.5 \n", + " 20 78.4 73.3 \n", + " 21 67.7 61.6 \n", + " 22 65.1 65.3 \n", + " 23 54.4 58.0 \n", + " 24 79.6 72.8 \n", + " 25 60.4 47.5 \n", + " 26 73.5 56.1 \n", + " 27 50.3 60.1 \n", + " 28 69.8 66.4 \n", + " 29 78.4 79.3 \n", + " 30 58.8 62.5 \n", + " 31 60.5 65.1 \n", + " 32 63.0 61.0 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 56.1 \n", + " 1 62.9 \n", + " 2 59.9 \n", + " 3 52.5 \n", + " 4 54.3 \n", + " 5 62.5 \n", + " 6 51.0 \n", + " 7 51.2 \n", + " 8 68.9 \n", + " 9 48.1 \n", + " 10 69.1 \n", + " 11 57.7 \n", + " 12 62.9 \n", + " 13 54.6 \n", + " 14 63.0 \n", + " 15 57.7 \n", + " 16 68.8 \n", + " 17 50.5 \n", + " 18 65.9 \n", + " 19 62.8 \n", + " 20 73.5 \n", + " 21 57.1 \n", + " 22 59.8 \n", + " 23 59.1 \n", + " 24 66.3 \n", + " 25 47.4 \n", + " 26 50.7 \n", + " 27 51.3 \n", + " 28 61.2 \n", + " 29 68.9 \n", + " 30 64.6 \n", + " 31 63.8 \n", + " 32 57.2 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2012': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 64.4 58.9 \n", + " 1 67.0 64.7 \n", + " 2 66.2 63.7 \n", + " 3 62.0 53.1 \n", + " 4 67.1 61.4 \n", + " 5 62.5 64.3 \n", + " 6 67.0 60.8 \n", + " 7 54.5 58.5 \n", + " 8 66.0 72.1 \n", + " 9 43.5 62.5 \n", + " 10 70.2 67.7 \n", + " 11 68.3 61.9 \n", + " 12 68.3 62.9 \n", + " 13 62.6 60.3 \n", + " 14 68.5 61.3 \n", + " 15 57.8 59.4 \n", + " 16 57.6 69.9 \n", + " 17 69.3 61.9 \n", + " 18 74.2 69.6 \n", + " 19 61.7 66.2 \n", + " 20 79.5 75.4 \n", + " 21 68.0 64.0 \n", + " 22 57.8 63.3 \n", + " 23 50.4 59.4 \n", + " 24 78.8 71.8 \n", + " 25 59.6 46.6 \n", + " 26 73.8 56.9 \n", + " 27 41.5 59.9 \n", + " 28 67.0 67.6 \n", + " 29 77.2 78.6 \n", + " 30 47.1 57.6 \n", + " 31 56.6 63.0 \n", + " 32 62.6 59.9 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 55.3 \n", + " 1 62.9 \n", + " 2 60.9 \n", + " 3 51.4 \n", + " 4 51.8 \n", + " 5 61.5 \n", + " 6 53.9 \n", + " 7 52.0 \n", + " 8 66.0 \n", + " 9 57.5 \n", + " 10 70.9 \n", + " 11 52.5 \n", + " 12 61.0 \n", + " 13 57.8 \n", + " 14 58.3 \n", + " 15 59.0 \n", + " 16 67.7 \n", + " 17 52.1 \n", + " 18 67.1 \n", + " 19 63.5 \n", + " 20 73.7 \n", + " 21 59.8 \n", + " 22 59.8 \n", + " 23 56.2 \n", + " 24 60.5 \n", + " 25 46.6 \n", + " 26 46.9 \n", + " 27 49.9 \n", + " 28 59.4 \n", + " 29 69.8 \n", + " 30 57.5 \n", + " 31 64.7 \n", + " 32 53.6 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2013': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 61.8 59.3 \n", + " 1 65.1 65.2 \n", + " 2 63.8 66.2 \n", + " 3 59.5 52.2 \n", + " 4 63.0 63.8 \n", + " 5 60.1 61.6 \n", + " 6 66.3 62.2 \n", + " 7 55.8 56.4 \n", + " 8 63.2 66.0 \n", + " 9 38.4 58.2 \n", + " 10 69.1 69.9 \n", + " 11 64.4 60.6 \n", + " 12 63.3 66.0 \n", + " 13 60.5 61.9 \n", + " 14 66.0 66.3 \n", + " 15 50.2 63.1 \n", + " 16 56.4 72.8 \n", + " 17 68.0 67.2 \n", + " 18 73.1 71.1 \n", + " 19 61.3 65.7 \n", + " 20 76.4 77.6 \n", + " 21 66.8 65.8 \n", + " 22 52.4 60.5 \n", + " 23 47.8 61.8 \n", + " 24 77.1 74.3 \n", + " 25 58.4 44.8 \n", + " 26 70.7 59.6 \n", + " 27 35.1 59.7 \n", + " 28 62.8 70.3 \n", + " 29 76.4 75.2 \n", + " 30 41.9 47.7 \n", + " 31 53.5 64.7 \n", + " 32 63.5 58.7 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 55.4 \n", + " 1 62.0 \n", + " 2 61.9 \n", + " 3 51.0 \n", + " 4 52.9 \n", + " 5 61.4 \n", + " 6 54.1 \n", + " 7 48.6 \n", + " 8 63.9 \n", + " 9 60.3 \n", + " 10 70.6 \n", + " 11 54.4 \n", + " 12 60.4 \n", + " 13 57.8 \n", + " 14 63.2 \n", + " 15 60.9 \n", + " 16 65.5 \n", + " 17 56.9 \n", + " 18 71.0 \n", + " 19 55.5 \n", + " 20 72.9 \n", + " 21 60.0 \n", + " 22 61.1 \n", + " 23 51.7 \n", + " 24 63.1 \n", + " 25 46.1 \n", + " 26 48.2 \n", + " 27 51.8 \n", + " 28 59.4 \n", + " 29 67.8 \n", + " 30 60.5 \n", + " 31 65.1 \n", + " 32 53.9 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2014': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 58.7 59.6 \n", + " 1 60.7 65.6 \n", + " 2 60.4 68.1 \n", + " 3 56.4 51.9 \n", + " 4 61.2 64.5 \n", + " 5 56.0 58.6 \n", + " 6 57.7 63.7 \n", + " 7 53.6 55.5 \n", + " 8 61.6 67.2 \n", + " 9 35.5 54.8 \n", + " 10 64.8 72.0 \n", + " 11 60.4 60.3 \n", + " 12 56.9 65.2 \n", + " 13 59.0 63.6 \n", + " 14 62.1 68.2 \n", + " 15 42.5 63.9 \n", + " 16 52.5 74.8 \n", + " 17 65.0 69.4 \n", + " 18 71.2 72.7 \n", + " 19 60.1 69.4 \n", + " 20 71.9 81.3 \n", + " 21 64.4 67.9 \n", + " 22 46.0 56.6 \n", + " 23 45.6 63.0 \n", + " 24 75.3 73.7 \n", + " 25 56.1 44.9 \n", + " 26 73.2 61.2 \n", + " 27 30.0 57.5 \n", + " 28 58.9 70.9 \n", + " 29 75.1 73.4 \n", + " 30 35.0 37.8 \n", + " 31 46.7 62.0 \n", + " 32 62.9 61.1 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 54.5 \n", + " 1 58.9 \n", + " 2 64.7 \n", + " 3 49.3 \n", + " 4 52.4 \n", + " 5 61.0 \n", + " 6 52.9 \n", + " 7 49.4 \n", + " 8 67.0 \n", + " 9 64.4 \n", + " 10 63.8 \n", + " 11 57.8 \n", + " 12 60.8 \n", + " 13 61.1 \n", + " 14 63.6 \n", + " 15 62.2 \n", + " 16 69.0 \n", + " 17 60.3 \n", + " 18 68.6 \n", + " 19 61.3 \n", + " 20 74.1 \n", + " 21 67.8 \n", + " 22 60.5 \n", + " 23 53.9 \n", + " 24 63.1 \n", + " 25 43.5 \n", + " 26 48.4 \n", + " 27 50.4 \n", + " 28 59.0 \n", + " 29 66.4 \n", + " 30 58.7 \n", + " 31 64.4 \n", + " 32 54.2 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2015': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 55.8 59.2 \n", + " 1 57.8 66.0 \n", + " 2 57.2 67.6 \n", + " 3 53.8 51.1 \n", + " 4 57.6 63.6 \n", + " 5 53.3 56.2 \n", + " 6 53.0 62.4 \n", + " 7 50.5 54.1 \n", + " 8 59.2 65.1 \n", + " 9 32.1 54.7 \n", + " 10 62.8 73.7 \n", + " 11 55.7 61.7 \n", + " 12 53.1 64.9 \n", + " 13 55.3 63.7 \n", + " 14 60.1 67.5 \n", + " 15 36.3 60.4 \n", + " 16 49.0 74.2 \n", + " 17 62.9 70.4 \n", + " 18 69.3 73.1 \n", + " 19 58.1 69.9 \n", + " 20 68.2 81.0 \n", + " 21 62.7 68.6 \n", + " 22 41.7 53.1 \n", + " 23 43.0 62.6 \n", + " 24 72.9 72.3 \n", + " 25 54.5 44.4 \n", + " 26 69.7 61.2 \n", + " 27 26.5 54.1 \n", + " 28 56.0 70.5 \n", + " 29 73.1 69.4 \n", + " 30 30.8 34.0 \n", + " 31 42.7 60.6 \n", + " 32 62.1 59.7 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 53.7 \n", + " 1 58.4 \n", + " 2 64.4 \n", + " 3 48.3 \n", + " 4 52.0 \n", + " 5 58.3 \n", + " 6 53.3 \n", + " 7 49.6 \n", + " 8 68.1 \n", + " 9 54.5 \n", + " 10 65.6 \n", + " 11 55.5 \n", + " 12 58.9 \n", + " 13 61.0 \n", + " 14 65.6 \n", + " 15 62.6 \n", + " 16 69.1 \n", + " 17 60.4 \n", + " 18 66.0 \n", + " 19 57.8 \n", + " 20 73.7 \n", + " 21 67.6 \n", + " 22 60.9 \n", + " 23 54.2 \n", + " 24 62.0 \n", + " 25 41.7 \n", + " 26 48.0 \n", + " 27 50.1 \n", + " 28 58.0 \n", + " 29 67.2 \n", + " 30 52.3 \n", + " 31 61.6 \n", + " 32 52.7 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2016': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 53.5 58.2 \n", + " 1 55.7 65.6 \n", + " 2 54.3 66.9 \n", + " 3 51.4 49.7 \n", + " 4 56.9 61.2 \n", + " 5 51.0 55.3 \n", + " 6 47.2 62.7 \n", + " 7 46.8 55.5 \n", + " 8 57.7 63.3 \n", + " 9 28.9 52.9 \n", + " 10 61.3 73.2 \n", + " 11 54.4 62.0 \n", + " 12 52.5 62.7 \n", + " 13 53.1 64.2 \n", + " 14 57.3 67.5 \n", + " 15 30.9 55.6 \n", + " 16 45.4 72.9 \n", + " 17 60.3 71.4 \n", + " 18 65.8 74.0 \n", + " 19 56.1 70.7 \n", + " 20 66.1 79.4 \n", + " 21 61.4 69.9 \n", + " 22 39.3 50.9 \n", + " 23 40.7 59.8 \n", + " 24 70.1 70.2 \n", + " 25 51.9 43.3 \n", + " 26 69.5 59.3 \n", + " 27 24.5 47.9 \n", + " 28 54.9 68.4 \n", + " 29 71.9 67.3 \n", + " 30 30.0 31.1 \n", + " 31 40.0 60.1 \n", + " 32 59.7 59.9 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 53.9 \n", + " 1 60.4 \n", + " 2 64.6 \n", + " 3 47.8 \n", + " 4 53.6 \n", + " 5 57.3 \n", + " 6 54.4 \n", + " 7 51.1 \n", + " 8 67.3 \n", + " 9 51.1 \n", + " 10 69.7 \n", + " 11 55.5 \n", + " 12 60.9 \n", + " 13 58.7 \n", + " 14 63.5 \n", + " 15 64.1 \n", + " 16 68.3 \n", + " 17 65.5 \n", + " 18 65.4 \n", + " 19 64.6 \n", + " 20 72.0 \n", + " 21 68.0 \n", + " 22 59.8 \n", + " 23 56.2 \n", + " 24 58.3 \n", + " 25 41.2 \n", + " 26 49.1 \n", + " 27 50.7 \n", + " 28 61.0 \n", + " 29 68.0 \n", + " 30 49.8 \n", + " 31 61.6 \n", + " 32 50.8 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2017': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 51.4 56.8 \n", + " 1 53.1 64.8 \n", + " 2 52.2 65.3 \n", + " 3 49.4 48.3 \n", + " 4 54.6 59.2 \n", + " 5 48.2 52.5 \n", + " 6 42.7 60.5 \n", + " 7 45.1 54.0 \n", + " 8 54.5 62.9 \n", + " 9 26.2 50.3 \n", + " 10 59.9 72.7 \n", + " 11 53.8 60.4 \n", + " 12 44.1 61.6 \n", + " 13 50.5 63.3 \n", + " 14 54.4 63.7 \n", + " 15 28.4 50.8 \n", + " 16 43.2 69.6 \n", + " 17 59.1 71.3 \n", + " 18 62.6 73.2 \n", + " 19 55.1 69.8 \n", + " 20 62.3 78.4 \n", + " 21 59.6 70.4 \n", + " 22 36.8 49.4 \n", + " 23 37.9 58.2 \n", + " 24 68.0 67.8 \n", + " 25 50.8 41.2 \n", + " 26 66.0 58.0 \n", + " 27 23.0 44.4 \n", + " 28 53.4 66.9 \n", + " 29 71.5 68.3 \n", + " 30 29.3 33.3 \n", + " 31 37.3 54.6 \n", + " 32 56.8 56.1 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 53.1 \n", + " 1 59.9 \n", + " 2 61.8 \n", + " 3 46.7 \n", + " 4 54.9 \n", + " 5 55.5 \n", + " 6 52.2 \n", + " 7 48.1 \n", + " 8 64.1 \n", + " 9 51.2 \n", + " 10 69.8 \n", + " 11 54.6 \n", + " 12 59.5 \n", + " 13 56.2 \n", + " 14 59.9 \n", + " 15 60.8 \n", + " 16 67.9 \n", + " 17 66.9 \n", + " 18 65.2 \n", + " 19 52.6 \n", + " 20 75.2 \n", + " 21 65.7 \n", + " 22 57.4 \n", + " 23 51.5 \n", + " 24 58.6 \n", + " 25 39.6 \n", + " 26 51.2 \n", + " 27 50.5 \n", + " 28 62.0 \n", + " 29 65.6 \n", + " 30 46.4 \n", + " 31 60.5 \n", + " 32 50.5 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2018': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 48.4 55.0 \n", + " 1 49.9 62.6 \n", + " 2 48.9 63.4 \n", + " 3 46.6 46.8 \n", + " 4 52.5 57.0 \n", + " 5 44.1 49.8 \n", + " 6 36.6 58.4 \n", + " 7 41.4 51.5 \n", + " 8 51.6 60.7 \n", + " 9 25.2 45.0 \n", + " 10 57.7 70.7 \n", + " 11 51.3 60.2 \n", + " 12 38.7 57.7 \n", + " 13 47.7 61.2 \n", + " 14 50.9 62.2 \n", + " 15 24.6 46.0 \n", + " 16 41.0 67.5 \n", + " 17 57.0 70.7 \n", + " 18 57.0 70.8 \n", + " 19 50.1 68.2 \n", + " 20 60.8 76.4 \n", + " 21 56.8 69.9 \n", + " 22 34.0 48.1 \n", + " 23 34.5 55.4 \n", + " 24 64.7 66.6 \n", + " 25 48.3 38.8 \n", + " 26 63.0 55.2 \n", + " 27 21.7 41.4 \n", + " 28 51.9 65.4 \n", + " 29 67.4 67.6 \n", + " 30 27.9 34.6 \n", + " 31 32.9 50.0 \n", + " 32 54.3 53.8 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 53.2 \n", + " 1 61.0 \n", + " 2 61.2 \n", + " 3 47.0 \n", + " 4 54.1 \n", + " 5 53.2 \n", + " 6 51.2 \n", + " 7 48.4 \n", + " 8 61.4 \n", + " 9 52.5 \n", + " 10 70.6 \n", + " 11 56.1 \n", + " 12 58.8 \n", + " 13 56.3 \n", + " 14 55.7 \n", + " 15 57.4 \n", + " 16 67.7 \n", + " 17 66.1 \n", + " 18 63.2 \n", + " 19 60.0 \n", + " 20 75.8 \n", + " 21 63.9 \n", + " 22 55.4 \n", + " 23 52.4 \n", + " 24 60.3 \n", + " 25 38.8 \n", + " 26 49.9 \n", + " 27 47.4 \n", + " 28 63.0 \n", + " 29 62.6 \n", + " 30 43.8 \n", + " 31 58.2 \n", + " 32 47.6 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2019': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 44.9 53.3 \n", + " 1 46.8 60.9 \n", + " 2 45.1 61.3 \n", + " 3 42.6 45.2 \n", + " 4 50.1 55.0 \n", + " 5 41.1 48.3 \n", + " 6 30.9 52.9 \n", + " 7 36.9 49.6 \n", + " 8 49.0 59.5 \n", + " 9 24.8 43.4 \n", + " 10 55.1 69.7 \n", + " 11 46.4 59.8 \n", + " 12 35.5 55.8 \n", + " 13 44.7 59.9 \n", + " 14 45.6 62.2 \n", + " 15 21.6 40.5 \n", + " 16 37.6 64.8 \n", + " 17 53.8 70.6 \n", + " 18 51.2 68.4 \n", + " 19 45.0 65.0 \n", + " 20 57.4 75.1 \n", + " 21 53.5 69.5 \n", + " 22 30.7 45.6 \n", + " 23 30.8 53.4 \n", + " 24 60.4 65.9 \n", + " 25 43.9 37.5 \n", + " 26 61.6 51.5 \n", + " 27 20.0 38.7 \n", + " 28 49.1 64.2 \n", + " 29 65.0 66.2 \n", + " 30 27.5 33.9 \n", + " 31 30.2 47.7 \n", + " 32 44.8 50.5 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 51.1 \n", + " 1 59.3 \n", + " 2 59.4 \n", + " 3 44.7 \n", + " 4 50.9 \n", + " 5 51.4 \n", + " 6 48.3 \n", + " 7 46.7 \n", + " 8 62.3 \n", + " 9 47.7 \n", + " 10 68.5 \n", + " 11 51.7 \n", + " 12 56.8 \n", + " 13 54.8 \n", + " 14 55.8 \n", + " 15 52.0 \n", + " 16 66.1 \n", + " 17 64.5 \n", + " 18 65.5 \n", + " 19 59.3 \n", + " 20 74.6 \n", + " 21 63.1 \n", + " 22 50.7 \n", + " 23 50.2 \n", + " 24 59.5 \n", + " 25 36.9 \n", + " 26 45.2 \n", + " 27 43.0 \n", + " 28 59.4 \n", + " 29 59.2 \n", + " 30 43.9 \n", + " 31 56.0 \n", + " 32 45.1 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2020': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 41.7 51.6 \n", + " 1 44.0 58.8 \n", + " 2 40.9 59.3 \n", + " 3 39.3 43.8 \n", + " 4 48.2 53.7 \n", + " 5 38.1 45.4 \n", + " 6 29.2 47.8 \n", + " 7 34.9 47.4 \n", + " 8 45.9 57.8 \n", + " 9 25.2 37.0 \n", + " 10 52.0 68.6 \n", + " 11 41.1 59.7 \n", + " 12 32.2 52.9 \n", + " 13 41.2 58.6 \n", + " 14 39.8 58.2 \n", + " 15 18.4 37.1 \n", + " 16 33.4 62.5 \n", + " 17 50.3 68.4 \n", + " 18 43.5 65.2 \n", + " 19 39.7 61.2 \n", + " 20 53.9 74.8 \n", + " 21 50.6 68.8 \n", + " 22 28.2 44.7 \n", + " 23 27.9 51.2 \n", + " 24 55.4 64.5 \n", + " 25 40.7 35.4 \n", + " 26 61.5 49.7 \n", + " 27 18.7 37.4 \n", + " 28 46.0 63.8 \n", + " 29 61.7 65.3 \n", + " 30 24.5 35.0 \n", + " 31 29.2 42.5 \n", + " 32 39.9 49.1 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 50.3 \n", + " 1 58.5 \n", + " 2 58.5 \n", + " 3 44.2 \n", + " 4 48.8 \n", + " 5 49.4 \n", + " 6 49.1 \n", + " 7 46.7 \n", + " 8 58.6 \n", + " 9 48.3 \n", + " 10 67.6 \n", + " 11 52.2 \n", + " 12 54.8 \n", + " 13 52.4 \n", + " 14 55.8 \n", + " 15 49.2 \n", + " 16 62.7 \n", + " 17 62.1 \n", + " 18 63.7 \n", + " 19 58.8 \n", + " 20 71.3 \n", + " 21 64.9 \n", + " 22 49.6 \n", + " 23 49.4 \n", + " 24 60.4 \n", + " 25 35.7 \n", + " 26 44.3 \n", + " 27 40.0 \n", + " 28 58.0 \n", + " 29 59.3 \n", + " 30 42.7 \n", + " 31 51.9 \n", + " 32 44.3 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2021': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 36.6 49.5 \n", + " 1 36.8 57.7 \n", + " 2 34.2 56.9 \n", + " 3 36.2 41.1 \n", + " 4 43.1 51.1 \n", + " 5 33.6 43.3 \n", + " 6 23.4 43.6 \n", + " 7 27.9 46.2 \n", + " 8 39.8 56.7 \n", + " 9 20.5 35.8 \n", + " 10 43.4 68.1 \n", + " 11 34.0 58.8 \n", + " 12 27.0 48.7 \n", + " 13 35.3 56.5 \n", + " 14 33.2 56.3 \n", + " 15 14.6 33.3 \n", + " 16 23.6 60.1 \n", + " 17 42.4 65.7 \n", + " 18 36.4 60.8 \n", + " 19 32.2 57.4 \n", + " 20 44.5 72.2 \n", + " 21 42.9 66.6 \n", + " 22 25.0 40.4 \n", + " 23 20.4 47.6 \n", + " 24 49.3 63.4 \n", + " 25 38.8 32.6 \n", + " 26 60.2 46.1 \n", + " 27 14.6 35.1 \n", + " 28 37.6 61.6 \n", + " 29 56.0 63.4 \n", + " 30 21.1 32.3 \n", + " 31 25.2 39.8 \n", + " 32 36.0 48.9 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 48.3 \n", + " 1 57.5 \n", + " 2 57.8 \n", + " 3 41.8 \n", + " 4 44.9 \n", + " 5 46.1 \n", + " 6 48.7 \n", + " 7 44.4 \n", + " 8 55.9 \n", + " 9 43.7 \n", + " 10 66.7 \n", + " 11 54.5 \n", + " 12 52.9 \n", + " 13 50.8 \n", + " 14 55.3 \n", + " 15 44.4 \n", + " 16 59.9 \n", + " 17 61.8 \n", + " 18 61.3 \n", + " 19 58.9 \n", + " 20 68.7 \n", + " 21 68.6 \n", + " 22 45.1 \n", + " 23 47.3 \n", + " 24 57.8 \n", + " 25 34.9 \n", + " 26 40.6 \n", + " 27 37.1 \n", + " 28 53.2 \n", + " 29 57.9 \n", + " 30 40.7 \n", + " 31 46.5 \n", + " 32 43.6 \n", + " 33 NaN \n", + " 34 NaN }" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfs" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sheet: 2007\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2008\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2009\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2010\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2011\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2012\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2013\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2014\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2015\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2016\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2017\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2018\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2019\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2020\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2021\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n" + ] + } + ], + "source": [ + "for sheet_name, df in dfs.items():\n", + " print(f\"Sheet: {sheet_name}\")\n", + " print(df.columns) # This will print the column names of each DataFrame\n", + " print() # Adds a blank line for readability" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "melted_dataframe = pd.concat(\n", + " [\n", + " df.pipe(\n", + " lambda d: d.loc[(d[\"id_uf\"] == 0)]\n", + " )\n", + " .pipe(\n", + " lambda d: pd.melt(\n", + " d,\n", + " id_vars=[\"id_uf\", \"nome\"],\n", + " value_vars=d.columns.difference([\"id_uf\", \"nome\"]).tolist(), # Convert to list\n", + " var_name=\"etapa_ensino\",\n", + " value_name=\"tdi\",\n", + " )\n", + " )\n", + " .assign(ano=ano)\n", + " for ano, df in dfs.items()\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
id_ufnomeetapa_ensinotdiano
00BrasilEnsino Fundamental – Anos Finais60.62007
10BrasilEnsino Fundamental – Anos Iniciais72.22007
20BrasilEnsino Médio Regular65.32007
00BrasilEnsino Fundamental – Anos Finais54.52008
10BrasilEnsino Fundamental – Anos Iniciais66.52008
20BrasilEnsino Médio Regular57.32008
00BrasilEnsino Fundamental – Anos Finais54.82009
10BrasilEnsino Fundamental – Anos Iniciais68.42009
20BrasilEnsino Médio Regular56.02009
00BrasilEnsino Fundamental – Anos Finais57.22010
10BrasilEnsino Fundamental – Anos Iniciais67.52010
20BrasilEnsino Médio Regular56.42010
00BrasilEnsino Fundamental – Anos Finais58.52011
10BrasilEnsino Fundamental – Anos Iniciais66.52011
20BrasilEnsino Médio Regular56.12011
00BrasilEnsino Fundamental – Anos Finais58.92012
10BrasilEnsino Fundamental – Anos Iniciais64.42012
20BrasilEnsino Médio Regular55.32012
00BrasilEnsino Fundamental – Anos Finais59.32013
10BrasilEnsino Fundamental – Anos Iniciais61.82013
20BrasilEnsino Médio Regular55.42013
00BrasilEnsino Fundamental – Anos Finais59.62014
10BrasilEnsino Fundamental – Anos Iniciais58.72014
20BrasilEnsino Médio Regular54.52014
00BrasilEnsino Fundamental – Anos Finais59.22015
10BrasilEnsino Fundamental – Anos Iniciais55.82015
20BrasilEnsino Médio Regular53.72015
00BrasilEnsino Fundamental – Anos Finais58.22016
10BrasilEnsino Fundamental – Anos Iniciais53.52016
20BrasilEnsino Médio Regular53.92016
00BrasilEnsino Fundamental – Anos Finais56.82017
10BrasilEnsino Fundamental – Anos Iniciais51.42017
20BrasilEnsino Médio Regular53.12017
00BrasilEnsino Fundamental – Anos Finais55.02018
10BrasilEnsino Fundamental – Anos Iniciais48.42018
20BrasilEnsino Médio Regular53.22018
00BrasilEnsino Fundamental – Anos Finais53.32019
10BrasilEnsino Fundamental – Anos Iniciais44.92019
20BrasilEnsino Médio Regular51.12019
00BrasilEnsino Fundamental – Anos Finais51.62020
10BrasilEnsino Fundamental – Anos Iniciais41.72020
20BrasilEnsino Médio Regular50.32020
00BrasilEnsino Fundamental – Anos Finais49.52021
10BrasilEnsino Fundamental – Anos Iniciais36.62021
20BrasilEnsino Médio Regular48.32021
\n", + "
" + ], + "text/plain": [ + " id_uf nome etapa_ensino tdi ano\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 60.6 2007\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 72.2 2007\n", + "2 0 Brasil Ensino Médio Regular 65.3 2007\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 54.5 2008\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 66.5 2008\n", + "2 0 Brasil Ensino Médio Regular 57.3 2008\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 54.8 2009\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 68.4 2009\n", + "2 0 Brasil Ensino Médio Regular 56.0 2009\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 57.2 2010\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 67.5 2010\n", + "2 0 Brasil Ensino Médio Regular 56.4 2010\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 58.5 2011\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 66.5 2011\n", + "2 0 Brasil Ensino Médio Regular 56.1 2011\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 58.9 2012\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 64.4 2012\n", + "2 0 Brasil Ensino Médio Regular 55.3 2012\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 59.3 2013\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 61.8 2013\n", + "2 0 Brasil Ensino Médio Regular 55.4 2013\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 59.6 2014\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 58.7 2014\n", + "2 0 Brasil Ensino Médio Regular 54.5 2014\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 59.2 2015\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 55.8 2015\n", + "2 0 Brasil Ensino Médio Regular 53.7 2015\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 58.2 2016\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 53.5 2016\n", + "2 0 Brasil Ensino Médio Regular 53.9 2016\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 56.8 2017\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 51.4 2017\n", + "2 0 Brasil Ensino Médio Regular 53.1 2017\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 55.0 2018\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 48.4 2018\n", + "2 0 Brasil Ensino Médio Regular 53.2 2018\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 53.3 2019\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 44.9 2019\n", + "2 0 Brasil Ensino Médio Regular 51.1 2019\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 51.6 2020\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 41.7 2020\n", + "2 0 Brasil Ensino Médio Regular 50.3 2020\n", + "0 0 Brasil Ensino Fundamental – Anos Finais 49.5 2021\n", + "1 0 Brasil Ensino Fundamental – Anos Iniciais 36.6 2021\n", + "2 0 Brasil Ensino Médio Regular 48.3 2021" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "melted_dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31,\n", + " 32, 33, 35, 41, 42, 43, 50, 51, 52, 53], dtype=object)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "melted_dataframe['id_uf'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "melted_dataframe = melted_dataframe.drop(\n", + " columns=['id_uf', 'nome']\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "melted_dataframe = melted_dataframe[\n", + " [\n", + " \"ano\",\n", + " \"etapa_ensino\",\n", + " \"tdi\",\n", + " ]\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
anoetapa_ensinotdi
02007Ensino Fundamental – Anos Finais60.6
12007Ensino Fundamental – Anos Iniciais72.2
22007Ensino Médio Regular65.3
02008Ensino Fundamental – Anos Finais54.5
12008Ensino Fundamental – Anos Iniciais66.5
22008Ensino Médio Regular57.3
02009Ensino Fundamental – Anos Finais54.8
12009Ensino Fundamental – Anos Iniciais68.4
22009Ensino Médio Regular56.0
02010Ensino Fundamental – Anos Finais57.2
12010Ensino Fundamental – Anos Iniciais67.5
22010Ensino Médio Regular56.4
02011Ensino Fundamental – Anos Finais58.5
12011Ensino Fundamental – Anos Iniciais66.5
22011Ensino Médio Regular56.1
02012Ensino Fundamental – Anos Finais58.9
12012Ensino Fundamental – Anos Iniciais64.4
22012Ensino Médio Regular55.3
02013Ensino Fundamental – Anos Finais59.3
12013Ensino Fundamental – Anos Iniciais61.8
22013Ensino Médio Regular55.4
02014Ensino Fundamental – Anos Finais59.6
12014Ensino Fundamental – Anos Iniciais58.7
22014Ensino Médio Regular54.5
02015Ensino Fundamental – Anos Finais59.2
12015Ensino Fundamental – Anos Iniciais55.8
22015Ensino Médio Regular53.7
02016Ensino Fundamental – Anos Finais58.2
12016Ensino Fundamental – Anos Iniciais53.5
22016Ensino Médio Regular53.9
02017Ensino Fundamental – Anos Finais56.8
12017Ensino Fundamental – Anos Iniciais51.4
22017Ensino Médio Regular53.1
02018Ensino Fundamental – Anos Finais55.0
12018Ensino Fundamental – Anos Iniciais48.4
22018Ensino Médio Regular53.2
02019Ensino Fundamental – Anos Finais53.3
12019Ensino Fundamental – Anos Iniciais44.9
22019Ensino Médio Regular51.1
02020Ensino Fundamental – Anos Finais51.6
12020Ensino Fundamental – Anos Iniciais41.7
22020Ensino Médio Regular50.3
02021Ensino Fundamental – Anos Finais49.5
12021Ensino Fundamental – Anos Iniciais36.6
22021Ensino Médio Regular48.3
\n", + "
" + ], + "text/plain": [ + " ano etapa_ensino tdi\n", + "0 2007 Ensino Fundamental – Anos Finais 60.6\n", + "1 2007 Ensino Fundamental – Anos Iniciais 72.2\n", + "2 2007 Ensino Médio Regular 65.3\n", + "0 2008 Ensino Fundamental – Anos Finais 54.5\n", + "1 2008 Ensino Fundamental – Anos Iniciais 66.5\n", + "2 2008 Ensino Médio Regular 57.3\n", + "0 2009 Ensino Fundamental – Anos Finais 54.8\n", + "1 2009 Ensino Fundamental – Anos Iniciais 68.4\n", + "2 2009 Ensino Médio Regular 56.0\n", + "0 2010 Ensino Fundamental – Anos Finais 57.2\n", + "1 2010 Ensino Fundamental – Anos Iniciais 67.5\n", + "2 2010 Ensino Médio Regular 56.4\n", + "0 2011 Ensino Fundamental – Anos Finais 58.5\n", + "1 2011 Ensino Fundamental – Anos Iniciais 66.5\n", + "2 2011 Ensino Médio Regular 56.1\n", + "0 2012 Ensino Fundamental – Anos Finais 58.9\n", + "1 2012 Ensino Fundamental – Anos Iniciais 64.4\n", + "2 2012 Ensino Médio Regular 55.3\n", + "0 2013 Ensino Fundamental – Anos Finais 59.3\n", + "1 2013 Ensino Fundamental – Anos Iniciais 61.8\n", + "2 2013 Ensino Médio Regular 55.4\n", + "0 2014 Ensino Fundamental – Anos Finais 59.6\n", + "1 2014 Ensino Fundamental – Anos Iniciais 58.7\n", + "2 2014 Ensino Médio Regular 54.5\n", + "0 2015 Ensino Fundamental – Anos Finais 59.2\n", + "1 2015 Ensino Fundamental – Anos Iniciais 55.8\n", + "2 2015 Ensino Médio Regular 53.7\n", + "0 2016 Ensino Fundamental – Anos Finais 58.2\n", + "1 2016 Ensino Fundamental – Anos Iniciais 53.5\n", + "2 2016 Ensino Médio Regular 53.9\n", + "0 2017 Ensino Fundamental – Anos Finais 56.8\n", + "1 2017 Ensino Fundamental – Anos Iniciais 51.4\n", + "2 2017 Ensino Médio Regular 53.1\n", + "0 2018 Ensino Fundamental – Anos Finais 55.0\n", + "1 2018 Ensino Fundamental – Anos Iniciais 48.4\n", + "2 2018 Ensino Médio Regular 53.2\n", + "0 2019 Ensino Fundamental – Anos Finais 53.3\n", + "1 2019 Ensino Fundamental – Anos Iniciais 44.9\n", + "2 2019 Ensino Médio Regular 51.1\n", + "0 2020 Ensino Fundamental – Anos Finais 51.6\n", + "1 2020 Ensino Fundamental – Anos Iniciais 41.7\n", + "2 2020 Ensino Médio Regular 50.3\n", + "0 2021 Ensino Fundamental – Anos Finais 49.5\n", + "1 2021 Ensino Fundamental – Anos Iniciais 36.6\n", + "2 2021 Ensino Médio Regular 48.3" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "melted_dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "path = os.path.join(\n", + " OUTPUT, \"educacao_especial_brasil_distorcao_idade_serie\"\n", + " )\n", + "\n", + "os.makedirs(path, exist_ok=True)\n", + "melted_dataframe.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/models/br_inep_educacao_especial/code/educacao_especial_brasil_taxa_rendimento.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_brasil_taxa_rendimento.ipynb new file mode 100644 index 00000000..ca3a594c --- /dev/null +++ b/models/br_inep_educacao_especial/code/educacao_especial_brasil_taxa_rendimento.ipynb @@ -0,0 +1,1661 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import zipfile\n", + "import pandas as pd\n", + "import basedosdados as bd\n", + "\n", + "INPUT = os.path.join(os.getcwd(), \"input\")\n", + "OUTPUT = os.path.join(os.getcwd(), \"output\")\n", + "\n", + "os.makedirs(INPUT, exist_ok=True)\n", + "os.makedirs(OUTPUT, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def read_sheet(sheet_name: str, skiprows: int = 8) -> pd.DataFrame:\n", + " return pd.read_excel(\n", + " os.path.join(\n", + " INPUT,\n", + " \"tx_rend_brasil_regioes_ufs_esp.xlsx\"\n", + " ),\n", + " skiprows=skiprows,\n", + " sheet_name=sheet_name\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "excel_data = pd.ExcelFile(os.path.join(\n", + " INPUT,\n", + " \"tx_rend_brasil_regioes_ufs_esp.xlsx\"\n", + " ))\n", + "\n", + "# Get the sheet names\n", + "sheet_names = excel_data.sheet_names" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "dfs = {\n", + " sheet_name: read_sheet(sheet_name)\n", + " for sheet_name in sheet_names\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'BRASIL_REGIOES_UFS ': NU_ANO_CENSO UNIDGEO \\\n", + " 0 2007 Brasil \n", + " 1 2007 Norte \n", + " 2 2007 Nordeste \n", + " 3 2007 Sudeste \n", + " 4 2007 Sul \n", + " .. ... ... \n", + " 492 2021 Mato Grosso \n", + " 493 2021 Goiás \n", + " 494 2021 Distrito Federal \n", + " 495 NaN NaN \n", + " 496 Fonte: Censo da Educação Básica 2021/INEP. NaN \n", + " \n", + " NO_CATEGORIA NO_DEPENDENCIA 1_CAT_FUN 1_CAT_FUN_AI 1_CAT_FUN_AF \\\n", + " 0 Total Total 73.1 71.6 78.7 \n", + " 1 Total Total 63.2 61.4 75.5 \n", + " 2 Total Total 67.0 65.7 73.0 \n", + " 3 Total Total 77.0 75.6 81.5 \n", + " 4 Total Total 76.8 76.4 78.1 \n", + " .. ... ... ... ... ... \n", + " 492 Total Total 96.3 96.2 96.4 \n", + " 493 Total Total 96.5 95.2 97.6 \n", + " 494 Total Total 89.1 83.1 95.7 \n", + " 495 NaN NaN NaN NaN NaN \n", + " 496 NaN NaN NaN NaN NaN \n", + " \n", + " 1_CAT_MED 2_CAT_FUN 2_CAT_FUN_AI 2_CAT_FUN_AF 2_CAT_MED 3_CAT_FUN \\\n", + " 0 79.6 22.1 23.8 16.2 13.0 4.8 \n", + " 1 77.5 28.6 30.4 16.6 11.1 8.2 \n", + " 2 77.0 24.7 26.2 17.9 11.3 8.3 \n", + " 3 79.6 19.9 21.4 14.7 14.9 3.1 \n", + " 4 81.4 20.4 21.0 18.3 11.5 2.8 \n", + " .. ... ... ... ... ... ... \n", + " 492 81.9 3.1 3.4 2.7 13.4 0.6 \n", + " 493 97.8 2.8 4.2 1.6 1.3 0.7 \n", + " 494 92.2 10.6 16.4 4.1 7.3 0.3 \n", + " 495 NaN NaN NaN NaN NaN NaN \n", + " 496 NaN NaN NaN NaN NaN NaN \n", + " \n", + " 3_CAT_FUN_AI 3_CAT_FUN_AF 3_CAT_MED \n", + " 0 4.6 5.1 7.4 \n", + " 1 8.2 7.9 11.4 \n", + " 2 8.1 9.1 11.7 \n", + " 3 3.0 3.8 5.5 \n", + " 4 2.6 3.6 7.1 \n", + " .. ... ... ... \n", + " 492 0.4 0.9 4.7 \n", + " 493 0.6 0.8 0.9 \n", + " 494 0.5 0.2 0.5 \n", + " 495 NaN NaN NaN \n", + " 496 NaN NaN NaN \n", + " \n", + " [497 rows x 16 columns]}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfs" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sheet: BRASIL_REGIOES_UFS \n", + "Index(['NU_ANO_CENSO', 'UNIDGEO', 'NO_CATEGORIA', 'NO_DEPENDENCIA',\n", + " '1_CAT_FUN', '1_CAT_FUN_AI', '1_CAT_FUN_AF', '1_CAT_MED', '2_CAT_FUN',\n", + " '2_CAT_FUN_AI', '2_CAT_FUN_AF', '2_CAT_MED', '3_CAT_FUN',\n", + " '3_CAT_FUN_AI', '3_CAT_FUN_AF', '3_CAT_MED'],\n", + " dtype='object')\n", + "\n" + ] + } + ], + "source": [ + "for sheet_name, df in dfs.items():\n", + " print(f\"Sheet: {sheet_name}\")\n", + " print(df.columns) # This will print the column names of each DataFrame\n", + " print() # Adds a blank line for readability\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "RENAME_COLUMNS = {\n", + " 'NU_ANO_CENSO':'ano', \n", + " 'UNIDGEO':'nome_uf',\n", + " '1_CAT_FUN_AI':'taxaaprovacao_anosiniciais', \n", + " '1_CAT_FUN_AF':'taxaaprovacao_anosfinais', \n", + " '1_CAT_MED':'taxaaprovacao_ensinomedio', \n", + " '2_CAT_FUN_AI':'taxareprovacao_anosiniciais', \n", + " '2_CAT_FUN_AF':'taxareprovacao_anosfinais', \n", + " '2_CAT_MED':'taxareprovacao_ensinomedio', \n", + " '3_CAT_FUN_AI':'taxaabandono_anosiniciais', \n", + " '3_CAT_FUN_AF':'taxaabandono_anosfinais', \n", + " '3_CAT_MED' : 'taxaabandono_ensinomedio' \n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n", + " cols_drop = [\n", + " col\n", + " for col in df.columns\n", + " if col.startswith(\"NO_\") \n", + " or col.startswith(\"1_\") \n", + " or col.startswith(\"2_\") \n", + " or col.startswith(\"3_\")\n", + " ]\n", + "\n", + " return df.drop(columns=cols_drop)\n", + "\n", + "dfs = {\n", + " name: drop_unused_columns(\n", + " df.rename(columns=RENAME_COLUMNS, errors=\"raise\")\n", + " )\n", + " for name, df in dfs.items()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'BRASIL_REGIOES_UFS ': ano nome_uf \\\n", + " 0 2007 Brasil \n", + " 1 2007 Norte \n", + " 2 2007 Nordeste \n", + " 3 2007 Sudeste \n", + " 4 2007 Sul \n", + " .. ... ... \n", + " 492 2021 Mato Grosso \n", + " 493 2021 Goiás \n", + " 494 2021 Distrito Federal \n", + " 495 NaN NaN \n", + " 496 Fonte: Censo da Educação Básica 2021/INEP. NaN \n", + " \n", + " taxaaprovacao_anosiniciais taxaaprovacao_anosfinais \\\n", + " 0 71.6 78.7 \n", + " 1 61.4 75.5 \n", + " 2 65.7 73.0 \n", + " 3 75.6 81.5 \n", + " 4 76.4 78.1 \n", + " .. ... ... \n", + " 492 96.2 96.4 \n", + " 493 95.2 97.6 \n", + " 494 83.1 95.7 \n", + " 495 NaN NaN \n", + " 496 NaN NaN \n", + " \n", + " taxaaprovacao_ensinomedio taxareprovacao_anosiniciais \\\n", + " 0 79.6 23.8 \n", + " 1 77.5 30.4 \n", + " 2 77.0 26.2 \n", + " 3 79.6 21.4 \n", + " 4 81.4 21.0 \n", + " .. ... ... \n", + " 492 81.9 3.4 \n", + " 493 97.8 4.2 \n", + " 494 92.2 16.4 \n", + " 495 NaN NaN \n", + " 496 NaN NaN \n", + " \n", + " taxareprovacao_anosfinais taxareprovacao_ensinomedio \\\n", + " 0 16.2 13.0 \n", + " 1 16.6 11.1 \n", + " 2 17.9 11.3 \n", + " 3 14.7 14.9 \n", + " 4 18.3 11.5 \n", + " .. ... ... \n", + " 492 2.7 13.4 \n", + " 493 1.6 1.3 \n", + " 494 4.1 7.3 \n", + " 495 NaN NaN \n", + " 496 NaN NaN \n", + " \n", + " taxaabandono_anosiniciais taxaabandono_anosfinais \\\n", + " 0 4.6 5.1 \n", + " 1 8.2 7.9 \n", + " 2 8.1 9.1 \n", + " 3 3.0 3.8 \n", + " 4 2.6 3.6 \n", + " .. ... ... \n", + " 492 0.4 0.9 \n", + " 493 0.6 0.8 \n", + " 494 0.5 0.2 \n", + " 495 NaN NaN \n", + " 496 NaN NaN \n", + " \n", + " taxaabandono_ensinomedio \n", + " 0 7.4 \n", + " 1 11.4 \n", + " 2 11.7 \n", + " 3 5.5 \n", + " 4 7.1 \n", + " .. ... \n", + " 492 4.7 \n", + " 493 0.9 \n", + " 494 0.5 \n", + " 495 NaN \n", + " 496 NaN \n", + " \n", + " [497 rows x 11 columns]}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfs" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sheet: BRASIL_REGIOES_UFS \n", + "Index(['ano', 'nome_uf', 'taxaaprovacao_anosiniciais',\n", + " 'taxaaprovacao_anosfinais', 'taxaaprovacao_ensinomedio',\n", + " 'taxareprovacao_anosiniciais', 'taxareprovacao_anosfinais',\n", + " 'taxareprovacao_ensinomedio', 'taxaabandono_anosiniciais',\n", + " 'taxaabandono_anosfinais', 'taxaabandono_ensinomedio'],\n", + " dtype='object')\n", + "\n" + ] + } + ], + "source": [ + "for sheet_name, df in dfs.items():\n", + " print(f\"Sheet: {sheet_name}\")\n", + " print(df.columns) # This will print the column names of each DataFrame\n", + " print() # Adds a blank line for readability" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "melted_dataframe = pd.concat(\n", + " [\n", + " df.pipe(\n", + " lambda d: d.loc[(d[\"nome_uf\"] == 'Brasil' )]\n", + " )\n", + " .pipe(\n", + " lambda d: pd.melt(\n", + " d,\n", + " id_vars=[\"ano\", \"nome_uf\"],\n", + " value_vars=d.columns.difference([\"ano\", \"nome_uf\"]).tolist(), # Convert to list\n", + " var_name=\"metrica\",\n", + " value_name=\"valor\",\n", + " )\n", + " )\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
anonome_ufmetricavalor
02007Brasiltaxaabandono_anosfinais5.1
12008Brasiltaxaabandono_anosfinais5.0
22009Brasiltaxaabandono_anosfinais4.3
32010Brasiltaxaabandono_anosfinais4.6
42011Brasiltaxaabandono_anosfinais4.6
...............
1302017Brasiltaxareprovacao_ensinomedio11.2
1312018Brasiltaxareprovacao_ensinomedio11.4
1322019Brasiltaxareprovacao_ensinomedio9.4
1332020Brasiltaxareprovacao_ensinomedio2.2
1342021Brasiltaxareprovacao_ensinomedio3.8
\n", + "

135 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " ano nome_uf metrica valor\n", + "0 2007 Brasil taxaabandono_anosfinais 5.1\n", + "1 2008 Brasil taxaabandono_anosfinais 5.0\n", + "2 2009 Brasil taxaabandono_anosfinais 4.3\n", + "3 2010 Brasil taxaabandono_anosfinais 4.6\n", + "4 2011 Brasil taxaabandono_anosfinais 4.6\n", + ".. ... ... ... ...\n", + "130 2017 Brasil taxareprovacao_ensinomedio 11.2\n", + "131 2018 Brasil taxareprovacao_ensinomedio 11.4\n", + "132 2019 Brasil taxareprovacao_ensinomedio 9.4\n", + "133 2020 Brasil taxareprovacao_ensinomedio 2.2\n", + "134 2021 Brasil taxareprovacao_ensinomedio 3.8\n", + "\n", + "[135 rows x 4 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "melted_dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "melted_dataframe['etapa_ensino'] = melted_dataframe['metrica'].apply(\n", + " lambda v: v.split('_')[-1]) # Extracts 'anosiniciais', 'anosfinais', or 'ensinomedio'\n", + "melted_dataframe['tipo_metrica'] = melted_dataframe['metrica'].apply(\n", + " lambda v: v.split('_')[0]) # Extracts 'taxaaprovacao', 'taxareprovacao', 'taxaabandono'\n", + "\n", + "# Pivoting the melted DataFrame to get desired columns\n", + "df_final = melted_dataframe.pivot_table(index=['ano', 'nome_uf', 'etapa_ensino'], \n", + " columns='tipo_metrica', \n", + " values='valor').reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "RENAME_COLUMNS_MELTED = {\n", + " 'taxaabandono':'taxa_abandono', \n", + " 'taxaaprovacao':'taxa_aprovacao',\n", + " 'taxareprovacao':'taxa_reprovacao' \n", + "}\n", + "\n", + "etapa_ensino = {\n", + " 'anosiniciais': 'Ensino Fundamental – Anos Iniciais',\n", + " 'anosfinais':'Ensino Fundamental – Anos Finais',\n", + " 'ensinomedio':'Ensino Médio Regular'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tipo_metricaanonome_ufetapa_ensinotaxaabandonotaxaaprovacaotaxareprovacao
02007Brasilanosfinais5.178.716.2
12007Brasilanosiniciais4.671.623.8
22007Brasilensinomedio7.479.613.0
32008Brasilanosfinais5.078.816.2
42008Brasilanosiniciais4.572.423.1
52008Brasilensinomedio7.879.312.9
62009Brasilanosfinais4.381.514.2
72009Brasilanosiniciais3.374.422.3
82009Brasilensinomedio7.080.013.0
92010Brasilanosfinais4.681.114.3
102010Brasilanosiniciais3.373.723.0
112010Brasilensinomedio7.479.413.2
122011Brasilanosfinais4.680.415.0
132011Brasilanosiniciais3.374.522.2
142011Brasilensinomedio7.179.413.5
152012Brasilanosfinais4.780.215.1
162012Brasilanosiniciais3.373.123.6
172012Brasilensinomedio7.180.112.8
182013Brasilanosfinais4.481.713.9
192013Brasilanosiniciais2.975.221.9
202013Brasilensinomedio6.881.711.5
212014Brasilanosfinais4.480.914.7
222014Brasilanosiniciais2.973.723.4
232014Brasilensinomedio6.182.011.9
242015Brasilanosfinais4.381.014.7
252015Brasilanosiniciais2.774.522.8
262015Brasilensinomedio6.282.311.5
272016Brasilanosfinais4.480.515.1
282016Brasilanosiniciais2.774.622.7
292016Brasilensinomedio5.782.511.8
302017Brasilanosfinais4.182.413.5
312017Brasilanosiniciais2.476.720.9
322017Brasilensinomedio6.082.811.2
332018Brasilanosfinais3.883.213.0
342018Brasilanosiniciais2.278.219.6
352018Brasilensinomedio6.182.511.4
362019Brasilanosfinais2.986.210.9
372019Brasilanosiniciais1.781.416.9
382019Brasilensinomedio4.586.19.4
392020Brasilanosfinais1.697.01.4
402020Brasilanosiniciais1.290.88.0
412020Brasilensinomedio2.695.22.2
422021Brasilanosfinais2.594.13.4
432021Brasilanosiniciais1.487.910.7
442021Brasilensinomedio4.591.73.8
\n", + "
" + ], + "text/plain": [ + "tipo_metrica ano nome_uf etapa_ensino taxaabandono taxaaprovacao \\\n", + "0 2007 Brasil anosfinais 5.1 78.7 \n", + "1 2007 Brasil anosiniciais 4.6 71.6 \n", + "2 2007 Brasil ensinomedio 7.4 79.6 \n", + "3 2008 Brasil anosfinais 5.0 78.8 \n", + "4 2008 Brasil anosiniciais 4.5 72.4 \n", + "5 2008 Brasil ensinomedio 7.8 79.3 \n", + "6 2009 Brasil anosfinais 4.3 81.5 \n", + "7 2009 Brasil anosiniciais 3.3 74.4 \n", + "8 2009 Brasil ensinomedio 7.0 80.0 \n", + "9 2010 Brasil anosfinais 4.6 81.1 \n", + "10 2010 Brasil anosiniciais 3.3 73.7 \n", + "11 2010 Brasil ensinomedio 7.4 79.4 \n", + "12 2011 Brasil anosfinais 4.6 80.4 \n", + "13 2011 Brasil anosiniciais 3.3 74.5 \n", + "14 2011 Brasil ensinomedio 7.1 79.4 \n", + "15 2012 Brasil anosfinais 4.7 80.2 \n", + "16 2012 Brasil anosiniciais 3.3 73.1 \n", + "17 2012 Brasil ensinomedio 7.1 80.1 \n", + "18 2013 Brasil anosfinais 4.4 81.7 \n", + "19 2013 Brasil anosiniciais 2.9 75.2 \n", + "20 2013 Brasil ensinomedio 6.8 81.7 \n", + "21 2014 Brasil anosfinais 4.4 80.9 \n", + "22 2014 Brasil anosiniciais 2.9 73.7 \n", + "23 2014 Brasil ensinomedio 6.1 82.0 \n", + "24 2015 Brasil anosfinais 4.3 81.0 \n", + "25 2015 Brasil anosiniciais 2.7 74.5 \n", + "26 2015 Brasil ensinomedio 6.2 82.3 \n", + "27 2016 Brasil anosfinais 4.4 80.5 \n", + "28 2016 Brasil anosiniciais 2.7 74.6 \n", + "29 2016 Brasil ensinomedio 5.7 82.5 \n", + "30 2017 Brasil anosfinais 4.1 82.4 \n", + "31 2017 Brasil anosiniciais 2.4 76.7 \n", + "32 2017 Brasil ensinomedio 6.0 82.8 \n", + "33 2018 Brasil anosfinais 3.8 83.2 \n", + "34 2018 Brasil anosiniciais 2.2 78.2 \n", + "35 2018 Brasil ensinomedio 6.1 82.5 \n", + "36 2019 Brasil anosfinais 2.9 86.2 \n", + "37 2019 Brasil anosiniciais 1.7 81.4 \n", + "38 2019 Brasil ensinomedio 4.5 86.1 \n", + "39 2020 Brasil anosfinais 1.6 97.0 \n", + "40 2020 Brasil anosiniciais 1.2 90.8 \n", + "41 2020 Brasil ensinomedio 2.6 95.2 \n", + "42 2021 Brasil anosfinais 2.5 94.1 \n", + "43 2021 Brasil anosiniciais 1.4 87.9 \n", + "44 2021 Brasil ensinomedio 4.5 91.7 \n", + "\n", + "tipo_metrica taxareprovacao \n", + "0 16.2 \n", + "1 23.8 \n", + "2 13.0 \n", + "3 16.2 \n", + "4 23.1 \n", + "5 12.9 \n", + "6 14.2 \n", + "7 22.3 \n", + "8 13.0 \n", + "9 14.3 \n", + "10 23.0 \n", + "11 13.2 \n", + "12 15.0 \n", + "13 22.2 \n", + "14 13.5 \n", + "15 15.1 \n", + "16 23.6 \n", + "17 12.8 \n", + "18 13.9 \n", + "19 21.9 \n", + "20 11.5 \n", + "21 14.7 \n", + "22 23.4 \n", + "23 11.9 \n", + "24 14.7 \n", + "25 22.8 \n", + "26 11.5 \n", + "27 15.1 \n", + "28 22.7 \n", + "29 11.8 \n", + "30 13.5 \n", + "31 20.9 \n", + "32 11.2 \n", + "33 13.0 \n", + "34 19.6 \n", + "35 11.4 \n", + "36 10.9 \n", + "37 16.9 \n", + "38 9.4 \n", + "39 1.4 \n", + "40 8.0 \n", + "41 2.2 \n", + "42 3.4 \n", + "43 10.7 \n", + "44 3.8 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_final" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "df_final = df_final.rename(columns=RENAME_COLUMNS_MELTED)\n", + "df_final['etapa_ensino'] = df_final['etapa_ensino'].replace(etapa_ensino)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Rename the 'sigla' column to 'sigla_uf' and drop the 'nome' column\n", + "df_final = df_final.drop(columns=['nome_uf'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "df_final = df_final[['ano', 'etapa_ensino', 'taxa_aprovacao','taxa_reprovacao','taxa_abandono']]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tipo_metricaanoetapa_ensinotaxa_aprovacaotaxa_reprovacaotaxa_abandono
02007Ensino Fundamental – Anos Finais78.716.25.1
12007Ensino Fundamental – Anos Iniciais71.623.84.6
22007Ensino Médio Regular79.613.07.4
32008Ensino Fundamental – Anos Finais78.816.25.0
42008Ensino Fundamental – Anos Iniciais72.423.14.5
52008Ensino Médio Regular79.312.97.8
62009Ensino Fundamental – Anos Finais81.514.24.3
72009Ensino Fundamental – Anos Iniciais74.422.33.3
82009Ensino Médio Regular80.013.07.0
92010Ensino Fundamental – Anos Finais81.114.34.6
102010Ensino Fundamental – Anos Iniciais73.723.03.3
112010Ensino Médio Regular79.413.27.4
122011Ensino Fundamental – Anos Finais80.415.04.6
132011Ensino Fundamental – Anos Iniciais74.522.23.3
142011Ensino Médio Regular79.413.57.1
152012Ensino Fundamental – Anos Finais80.215.14.7
162012Ensino Fundamental – Anos Iniciais73.123.63.3
172012Ensino Médio Regular80.112.87.1
182013Ensino Fundamental – Anos Finais81.713.94.4
192013Ensino Fundamental – Anos Iniciais75.221.92.9
202013Ensino Médio Regular81.711.56.8
212014Ensino Fundamental – Anos Finais80.914.74.4
222014Ensino Fundamental – Anos Iniciais73.723.42.9
232014Ensino Médio Regular82.011.96.1
242015Ensino Fundamental – Anos Finais81.014.74.3
252015Ensino Fundamental – Anos Iniciais74.522.82.7
262015Ensino Médio Regular82.311.56.2
272016Ensino Fundamental – Anos Finais80.515.14.4
282016Ensino Fundamental – Anos Iniciais74.622.72.7
292016Ensino Médio Regular82.511.85.7
302017Ensino Fundamental – Anos Finais82.413.54.1
312017Ensino Fundamental – Anos Iniciais76.720.92.4
322017Ensino Médio Regular82.811.26.0
332018Ensino Fundamental – Anos Finais83.213.03.8
342018Ensino Fundamental – Anos Iniciais78.219.62.2
352018Ensino Médio Regular82.511.46.1
362019Ensino Fundamental – Anos Finais86.210.92.9
372019Ensino Fundamental – Anos Iniciais81.416.91.7
382019Ensino Médio Regular86.19.44.5
392020Ensino Fundamental – Anos Finais97.01.41.6
402020Ensino Fundamental – Anos Iniciais90.88.01.2
412020Ensino Médio Regular95.22.22.6
422021Ensino Fundamental – Anos Finais94.13.42.5
432021Ensino Fundamental – Anos Iniciais87.910.71.4
442021Ensino Médio Regular91.73.84.5
\n", + "
" + ], + "text/plain": [ + "tipo_metrica ano etapa_ensino taxa_aprovacao \\\n", + "0 2007 Ensino Fundamental – Anos Finais 78.7 \n", + "1 2007 Ensino Fundamental – Anos Iniciais 71.6 \n", + "2 2007 Ensino Médio Regular 79.6 \n", + "3 2008 Ensino Fundamental – Anos Finais 78.8 \n", + "4 2008 Ensino Fundamental – Anos Iniciais 72.4 \n", + "5 2008 Ensino Médio Regular 79.3 \n", + "6 2009 Ensino Fundamental – Anos Finais 81.5 \n", + "7 2009 Ensino Fundamental – Anos Iniciais 74.4 \n", + "8 2009 Ensino Médio Regular 80.0 \n", + "9 2010 Ensino Fundamental – Anos Finais 81.1 \n", + "10 2010 Ensino Fundamental – Anos Iniciais 73.7 \n", + "11 2010 Ensino Médio Regular 79.4 \n", + "12 2011 Ensino Fundamental – Anos Finais 80.4 \n", + "13 2011 Ensino Fundamental – Anos Iniciais 74.5 \n", + "14 2011 Ensino Médio Regular 79.4 \n", + "15 2012 Ensino Fundamental – Anos Finais 80.2 \n", + "16 2012 Ensino Fundamental – Anos Iniciais 73.1 \n", + "17 2012 Ensino Médio Regular 80.1 \n", + "18 2013 Ensino Fundamental – Anos Finais 81.7 \n", + "19 2013 Ensino Fundamental – Anos Iniciais 75.2 \n", + "20 2013 Ensino Médio Regular 81.7 \n", + "21 2014 Ensino Fundamental – Anos Finais 80.9 \n", + "22 2014 Ensino Fundamental – Anos Iniciais 73.7 \n", + "23 2014 Ensino Médio Regular 82.0 \n", + "24 2015 Ensino Fundamental – Anos Finais 81.0 \n", + "25 2015 Ensino Fundamental – Anos Iniciais 74.5 \n", + "26 2015 Ensino Médio Regular 82.3 \n", + "27 2016 Ensino Fundamental – Anos Finais 80.5 \n", + "28 2016 Ensino Fundamental – Anos Iniciais 74.6 \n", + "29 2016 Ensino Médio Regular 82.5 \n", + "30 2017 Ensino Fundamental – Anos Finais 82.4 \n", + "31 2017 Ensino Fundamental – Anos Iniciais 76.7 \n", + "32 2017 Ensino Médio Regular 82.8 \n", + "33 2018 Ensino Fundamental – Anos Finais 83.2 \n", + "34 2018 Ensino Fundamental – Anos Iniciais 78.2 \n", + "35 2018 Ensino Médio Regular 82.5 \n", + "36 2019 Ensino Fundamental – Anos Finais 86.2 \n", + "37 2019 Ensino Fundamental – Anos Iniciais 81.4 \n", + "38 2019 Ensino Médio Regular 86.1 \n", + "39 2020 Ensino Fundamental – Anos Finais 97.0 \n", + "40 2020 Ensino Fundamental – Anos Iniciais 90.8 \n", + "41 2020 Ensino Médio Regular 95.2 \n", + "42 2021 Ensino Fundamental – Anos Finais 94.1 \n", + "43 2021 Ensino Fundamental – Anos Iniciais 87.9 \n", + "44 2021 Ensino Médio Regular 91.7 \n", + "\n", + "tipo_metrica taxa_reprovacao taxa_abandono \n", + "0 16.2 5.1 \n", + "1 23.8 4.6 \n", + "2 13.0 7.4 \n", + "3 16.2 5.0 \n", + "4 23.1 4.5 \n", + "5 12.9 7.8 \n", + "6 14.2 4.3 \n", + "7 22.3 3.3 \n", + "8 13.0 7.0 \n", + "9 14.3 4.6 \n", + "10 23.0 3.3 \n", + "11 13.2 7.4 \n", + "12 15.0 4.6 \n", + "13 22.2 3.3 \n", + "14 13.5 7.1 \n", + "15 15.1 4.7 \n", + "16 23.6 3.3 \n", + "17 12.8 7.1 \n", + "18 13.9 4.4 \n", + "19 21.9 2.9 \n", + "20 11.5 6.8 \n", + "21 14.7 4.4 \n", + "22 23.4 2.9 \n", + "23 11.9 6.1 \n", + "24 14.7 4.3 \n", + "25 22.8 2.7 \n", + "26 11.5 6.2 \n", + "27 15.1 4.4 \n", + "28 22.7 2.7 \n", + "29 11.8 5.7 \n", + "30 13.5 4.1 \n", + "31 20.9 2.4 \n", + "32 11.2 6.0 \n", + "33 13.0 3.8 \n", + "34 19.6 2.2 \n", + "35 11.4 6.1 \n", + "36 10.9 2.9 \n", + "37 16.9 1.7 \n", + "38 9.4 4.5 \n", + "39 1.4 1.6 \n", + "40 8.0 1.2 \n", + "41 2.2 2.6 \n", + "42 3.4 2.5 \n", + "43 10.7 1.4 \n", + "44 3.8 4.5 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_final" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "path = os.path.join(\n", + " OUTPUT, \"educacao_especial_brasil_taxa_rendimento\"\n", + " )\n", + "\n", + "os.makedirs(path, exist_ok=True)\n", + "df_final.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/models/br_inep_educacao_especial/code/educacao_especial_docente_aee.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_docente_aee.ipynb new file mode 100644 index 00000000..a25c0d30 --- /dev/null +++ b/models/br_inep_educacao_especial/code/educacao_especial_docente_aee.ipynb @@ -0,0 +1,630 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import zipfile\n", + "import pandas as pd\n", + "import basedosdados as bd\n", + "\n", + "INPUT = os.path.join(os.getcwd(), \"input\")\n", + "OUTPUT = os.path.join(os.getcwd(), \"output\")\n", + "\n", + "os.makedirs(INPUT, exist_ok=True)\n", + "os.makedirs(OUTPUT, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def read_sheet(sheet_name: str, skiprows: int = 2) -> pd.DataFrame:\n", + " return pd.read_excel(\n", + " os.path.join(\n", + " INPUT,\n", + " \"2020_2022_INDIC_ED_ESP.xlsx\"\n", + " ),\n", + " skiprows=skiprows,\n", + " sheet_name=sheet_name,\n", + " dtype=str\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "sheets_etapa_ensino_serie = {\n", + " \"INDICADOR 2\":\"regente_aee\",\n", + " \"INDICADOR 3\":\"aee\",\n", + " \"INDICADOR 4\":\"regente_formacaocontinuada\",\n", + " \"INDICADOR 5\":\"aee_formacaocontinuada\", \n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "dfs = {\n", + " name: read_sheet(sheet_name)\n", + " for sheet_name, name in sheets_etapa_ensino_serie.items()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "dataframes = {}\n", + "\n", + "for table_name, columns in dfs.items():\n", + " df = pd.DataFrame(columns) # Create DataFrame for each table\n", + " dataframes[table_name] = df # Store the DataFrame in a dictionary" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['NU_ANO_CENSO', 'CO_REGIAO', 'NO_REGIAO', 'CO_UF', 'NO_UF', 'SIGLA',\n", + " 'NO_MUNICIPIO', 'CO_MUNICIPIO', 'DOCENTES'],\n", + " dtype='object')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframes['aee'].columns" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "RENAMES_COLUMNS = {\n", + " \"regente_aee\": {\n", + " 'NU_ANO_CENSO':'ano', \n", + " #'CO_REGIAO',\n", + " #'NO_REGIAO', \n", + " #'CO_UF', \n", + " #'NO_UF', \n", + " 'SIGLA':\"sigla_uf\",\n", + " #'NO_MUNICIPIO', \n", + " 'CO_MUNICIPIO':\"id_municipio\", \n", + " 'DOCENTES':'quantidade_docente_regente_aee'\n", + " },\n", + " \"aee\": {\n", + " 'NU_ANO_CENSO':'ano', \n", + " #'CO_REGIAO',\n", + " #'NO_REGIAO', \n", + " #'CO_UF', \n", + " #'NO_UF', \n", + " 'SIGLA':\"sigla_uf\",\n", + " #'NO_MUNICIPIO', \n", + " 'CO_MUNICIPIO':\"id_municipio\", \n", + " 'DOCENTES':'quantidade_docente_aee'\n", + " },\n", + " \"regente_formacaocontinuada\": {\n", + " 'NU_ANO_CENSO':'ano', \n", + " #'CO_REGIAO',\n", + " #'NO_REGIAO', \n", + " #'CO_UF', \n", + " #'NO_UF', \n", + " 'SIGLA':\"sigla_uf\",\n", + " #'NO_MUNICIPIO', \n", + " 'CO_MUNICIPIO':\"id_municipio\", \n", + " 'DOCENTES':'quantidade_docente_regente_formacao_continuada'\n", + " },\n", + " \"aee_formacaocontinuada\": {\n", + " 'NU_ANO_CENSO':'ano', \n", + " #'CO_REGIAO',\n", + " #'NO_REGIAO', \n", + " #'CO_UF', \n", + " #'NO_UF', \n", + " 'SIGLA':\"sigla_uf\",\n", + " #'NO_MUNICIPIO', \n", + " 'CO_MUNICIPIO':\"id_municipio\", \n", + " 'DOCENTES':'quantidade_docente_aee_formacao_continuada'\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n", + " cols_drop = [\n", + " col\n", + " for col in df.columns\n", + " if col.startswith(\"CO_\")\n", + " or col.startswith(\"NO_\")\n", + " ]\n", + "\n", + " return df.drop(columns=cols_drop)\n", + "\n", + "dfs = {\n", + " name: drop_unused_columns(\n", + " df.rename(columns=RENAMES_COLUMNS[name], errors=\"raise\")\n", + " )\n", + " for name, df in dfs.items()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sheet: regente_aee\n", + "Index(['ano', 'sigla_uf', 'id_municipio', 'quantidade_docente_regente_aee'], dtype='object')\n", + "\n", + "Sheet: aee\n", + "Index(['ano', 'sigla_uf', 'id_municipio', 'quantidade_docente_aee'], dtype='object')\n", + "\n", + "Sheet: regente_formacaocontinuada\n", + "Index(['ano', 'sigla_uf', 'id_municipio',\n", + " 'quantidade_docente_regente_formacao_continuada'],\n", + " dtype='object')\n", + "\n", + "Sheet: aee_formacaocontinuada\n", + "Index(['ano', 'sigla_uf', 'id_municipio',\n", + " 'quantidade_docente_aee_formacao_continuada'],\n", + " dtype='object')\n", + "\n" + ] + } + ], + "source": [ + "for sheet_name, df in dfs.items():\n", + " print(f\"Sheet: {sheet_name}\")\n", + " print(df.columns) # This will print the column names of each DataFrame\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def remove_nulls(df: pd.DataFrame, name: str) -> pd.DataFrame:\n", + " # Ensure you're working with the whole DataFrame\n", + " df = df.dropna(subset=['id_municipio']) # Remove rows where 'id_municipio' is NaN\n", + " df = df.loc[df['id_municipio'].astype(str) != \" \"] # Filter out rows where 'id_municipio' is empty space\n", + " return df\n", + "\n", + "# Apply remove_nulls to each DataFrame in the dictionary\n", + "dfs = {\n", + " name: remove_nulls(df, name) # This keeps the original key (sheet name) with the cleaned DataFrame\n", + " for name, df in dfs.items()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'regente_aee': ano sigla_uf id_municipio quantidade_docente_regente_aee\n", + " 3 2020 RO 1100015 291\n", + " 4 2020 RO 1100379 128\n", + " 5 2020 RO 1100403 135\n", + " 6 2020 RO 1100346 189\n", + " 7 2020 RO 1100023 1018\n", + " ... ... ... ... ...\n", + " 16803 2022 GO 5222302 78\n", + " 16804 2022 GO 5200175 58\n", + " 16805 2022 GO 5200209 31\n", + " 16806 2022 GO 5200258 1818\n", + " 16808 2022 DF 5300108 30821\n", + " \n", + " [16710 rows x 4 columns],\n", + " 'aee': ano sigla_uf id_municipio quantidade_docente_aee\n", + " 3 2020 RO 1100015 10\n", + " 4 2020 RO 1100379 3\n", + " 5 2020 RO 1100403 4\n", + " 6 2020 RO 1100346 11\n", + " 7 2020 RO 1100023 55\n", + " ... ... ... ... ...\n", + " 13851 2022 GO 5222203 3\n", + " 13852 2022 GO 5222302 3\n", + " 13853 2022 GO 5200209 2\n", + " 13854 2022 GO 5200258 58\n", + " 13856 2022 DF 5300108 673\n", + " \n", + " [13758 rows x 4 columns],\n", + " 'regente_formacaocontinuada': ano sigla_uf id_municipio \\\n", + " 3 2020 RO 1100015 \n", + " 4 2020 RO 1100379 \n", + " 5 2020 RO 1100403 \n", + " 6 2020 RO 1100346 \n", + " 7 2020 RO 1100023 \n", + " ... ... ... ... \n", + " 15133 2022 GO 5222302 \n", + " 15134 2022 GO 5200175 \n", + " 15135 2022 GO 5200209 \n", + " 15136 2022 GO 5200258 \n", + " 15138 2022 DF 5300108 \n", + " \n", + " quantidade_docente_regente_formacao_continuada \n", + " 3 14 \n", + " 4 1 \n", + " 5 2 \n", + " 6 9 \n", + " 7 34 \n", + " ... ... \n", + " 15133 2 \n", + " 15134 3 \n", + " 15135 6 \n", + " 15136 310 \n", + " 15138 4051 \n", + " \n", + " [15040 rows x 4 columns],\n", + " 'aee_formacaocontinuada': ano sigla_uf id_municipio quantidade_docente_aee_formacao_continuada\n", + " 3 2020 RO 1100015 5\n", + " 4 2020 RO 1100346 1\n", + " 5 2020 RO 1100023 15\n", + " 6 2020 RO 1100452 7\n", + " 7 2020 RO 1100031 3\n", + " ... ... ... ... ...\n", + " 9899 2022 GO 5221858 19\n", + " 9900 2022 GO 5222203 1\n", + " 9901 2022 GO 5200209 1\n", + " 9902 2022 GO 5200258 39\n", + " 9904 2022 DF 5300108 471\n", + " \n", + " [9806 rows x 4 columns]}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfs" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# Assuming 'dfs' is your dictionary with four tables\n", + "df_regente_aee = pd.DataFrame(dfs['regente_aee'])\n", + "df_aee = pd.DataFrame(dfs['aee'])\n", + "df_regente_formacaocontinuada = pd.DataFrame(dfs['regente_formacaocontinuada']) # Example third table\n", + "df_aee_formacaocontinuada = pd.DataFrame(dfs['aee_formacaocontinuada']) # Example fourth table\n", + "\n", + "# Merge all four DataFrames with outer join on 'ano', 'sigla_uf', 'id_municipio'\n", + "merged_df = df_regente_aee.merge(df_aee, on=['ano', 'sigla_uf', 'id_municipio'], how='outer')\n", + "merged_df = merged_df.merge(df_regente_formacaocontinuada, on=['ano', 'sigla_uf', 'id_municipio'], how='outer')\n", + "merged_df = merged_df.merge(df_aee_formacaocontinuada, on=['ano', 'sigla_uf', 'id_municipio'], how='outer')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "merged_df['quantidade_docente_regente_aee'] = pd.to_numeric(merged_df['quantidade_docente_regente_aee'], errors='coerce')\n", + "merged_df['quantidade_docente_aee'] = pd.to_numeric(merged_df['quantidade_docente_aee'], errors='coerce')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "merged_df['quantidade_docente_regente'] = merged_df['quantidade_docente_regente_aee'] - merged_df['quantidade_docente_aee']" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "merged_df = merged_df.drop(columns='quantidade_docente_regente_aee', axis='')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "merged_df = merged_df[[\n", + " 'ano', \n", + " 'sigla_uf', \n", + " 'id_municipio',\n", + " 'quantidade_docente_regente',\n", + " 'quantidade_docente_aee',\n", + " 'quantidade_docente_regente_formacao_continuada',\n", + " 'quantidade_docente_aee_formacao_continuada']]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
anosigla_ufid_municipioquantidade_docente_regentequantidade_docente_aeequantidade_docente_regente_formacao_continuadaquantidade_docente_aee_formacao_continuada
02020RO1100015281.010.0145
12020RO1100379125.03.01NaN
22020RO1100403131.04.02NaN
32020RO1100346178.011.091
42020RO1100023963.055.03415
........................
167052022GO522230275.03.02NaN
167062022GO5200175NaNNaN3NaN
167072022GO520020929.02.061
167082022GO52002581760.058.031039
167092022DF530010830148.0673.04051471
\n", + "

16710 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " ano sigla_uf id_municipio quantidade_docente_regente \\\n", + "0 2020 RO 1100015 281.0 \n", + "1 2020 RO 1100379 125.0 \n", + "2 2020 RO 1100403 131.0 \n", + "3 2020 RO 1100346 178.0 \n", + "4 2020 RO 1100023 963.0 \n", + "... ... ... ... ... \n", + "16705 2022 GO 5222302 75.0 \n", + "16706 2022 GO 5200175 NaN \n", + "16707 2022 GO 5200209 29.0 \n", + "16708 2022 GO 5200258 1760.0 \n", + "16709 2022 DF 5300108 30148.0 \n", + "\n", + " quantidade_docente_aee quantidade_docente_regente_formacao_continuada \\\n", + "0 10.0 14 \n", + "1 3.0 1 \n", + "2 4.0 2 \n", + "3 11.0 9 \n", + "4 55.0 34 \n", + "... ... ... \n", + "16705 3.0 2 \n", + "16706 NaN 3 \n", + "16707 2.0 6 \n", + "16708 58.0 310 \n", + "16709 673.0 4051 \n", + "\n", + " quantidade_docente_aee_formacao_continuada \n", + "0 5 \n", + "1 NaN \n", + "2 NaN \n", + "3 1 \n", + "4 15 \n", + "... ... \n", + "16705 NaN \n", + "16706 NaN \n", + "16707 1 \n", + "16708 39 \n", + "16709 471 \n", + "\n", + "[16710 rows x 7 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "path = os.path.join(\n", + " OUTPUT, \"educacao_especial_aee_docente\"\n", + " )\n", + "\n", + "os.makedirs(path, exist_ok=True)\n", + "merged_df.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/models/br_inep_educacao_especial/code/educacao_especial_docente_formacao.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_docente_formacao.ipynb new file mode 100644 index 00000000..6a47fc0b --- /dev/null +++ b/models/br_inep_educacao_especial/code/educacao_especial_docente_formacao.ipynb @@ -0,0 +1,460 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import zipfile\n", + "import pandas as pd\n", + "import basedosdados as bd\n", + "\n", + "INPUT = os.path.join(os.getcwd(), \"input\")\n", + "OUTPUT = os.path.join(os.getcwd(), \"output\")\n", + "\n", + "os.makedirs(INPUT, exist_ok=True)\n", + "os.makedirs(OUTPUT, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def read_sheet(sheet_name: str, skiprows: int = 9) -> pd.DataFrame:\n", + " return pd.read_excel(\n", + " os.path.join(\n", + " INPUT,\n", + " \"Demanda_23546-049990_2024_06_DOC_EDU_ESPECIAL_BAS__2012_A_2023.xlsx\"\n", + " ),\n", + " skiprows=skiprows,\n", + " sheet_name=sheet_name,\n", + " dtype=str\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "excel_data = pd.ExcelFile(os.path.join(\n", + " INPUT,\n", + " \"Demanda_23546-049990_2024_06_DOC_EDU_ESPECIAL_BAS__2012_A_2023.xlsx\"\n", + " ))\n", + "\n", + "# Get the sheet names\n", + "sheet_names = excel_data.sheet_names" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "dfs = {\n", + " sheet_name: read_sheet(sheet_name)\n", + " for sheet_name in sheet_names\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Planilha1': NU_ANO_CENSO CO_REGIAO NO_REGIAO \\\n", + " 0 2012 NaN Brasil \n", + " 1 2012 1 Norte \n", + " 2 2012 1 Norte \n", + " 3 2012 1 Norte \n", + " 4 2012 1 Norte \n", + " ... ... ... ... \n", + " 57988 NaN NaN NaN \n", + " 57989 Fonte: INEP – Censo Escolar da Educação Básica NaN NaN \n", + " 57990 Notas: 1 - Os docentes referem-se aos indivíd... NaN NaN \n", + " 57991 2 - Os docentes são contados u... NaN NaN \n", + " 57992 3 - Não inclui auxiliares da E... NaN NaN \n", + " \n", + " CO_UF SG_UF NO_UF CO_MUNICIPIO NO_MUNICIPIO DOCEE DOCFED \\\n", + " 0 NaN NaN NaN NaN NaN 88244 255 \n", + " 1 NaN NaN NaN NaN NaN 5954 41 \n", + " 2 11 RO Rondônia NaN NaN 593 8 \n", + " 3 11 RO Rondônia 1100015 Alta Floresta D'Oeste 12 0 \n", + " 4 11 RO Rondônia 1100403 Alto Paraíso 2 0 \n", + " ... ... ... ... ... ... ... ... \n", + " 57988 NaN NaN NaN NaN NaN NaN NaN \n", + " 57989 NaN NaN NaN NaN NaN NaN NaN \n", + " 57990 NaN NaN NaN NaN NaN NaN NaN \n", + " 57991 NaN NaN NaN NaN NaN NaN NaN \n", + " 57992 NaN NaN NaN NaN NaN NaN NaN \n", + " \n", + " DOCEST DOCMUNI DOCPRIV \n", + " 0 23762 52541 21635 \n", + " 1 2328 3490 739 \n", + " 2 244 243 161 \n", + " 3 2 9 1 \n", + " 4 2 0 0 \n", + " ... ... ... ... \n", + " 57988 NaN NaN NaN \n", + " 57989 NaN NaN NaN \n", + " 57990 NaN NaN NaN \n", + " 57991 NaN NaN NaN \n", + " 57992 NaN NaN NaN \n", + " \n", + " [57993 rows x 13 columns]}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfs" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sheet: Planilha1\n", + "Index(['NU_ANO_CENSO', 'CO_REGIAO', 'NO_REGIAO', 'CO_UF', 'SG_UF', 'NO_UF',\n", + " 'CO_MUNICIPIO', 'NO_MUNICIPIO', 'DOCEE', 'DOCFED', 'DOCEST', 'DOCMUNI',\n", + " 'DOCPRIV'],\n", + " dtype='object')\n", + "\n" + ] + } + ], + "source": [ + "for sheet_name, df in dfs.items():\n", + " print(f\"Sheet: {sheet_name}\")\n", + " print(df.columns) # This will print the column names of each DataFrame\n", + " print() # Adds a blank line for readability" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "RENAME_COLUMNS = {\n", + " 'NU_ANO_CENSO':'ano', \n", + " 'SG_UF':'sigla_uf', \n", + " 'CO_MUNICIPIO':'id_municipio', \n", + " 'DOCFED':'Federal', \n", + " 'DOCEST':'Estadual', \n", + " 'DOCMUNI':'Municipal',\n", + " 'DOCPRIV':'Privada' \n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n", + " cols_drop = [\n", + " col\n", + " for col in df.columns\n", + " if col.startswith(\"NO_\") \n", + " or col.startswith(\"CO_\")\n", + " or col.startswith('DOCEE') \n", + " ]\n", + "\n", + " return df.drop(columns=cols_drop)\n", + "\n", + "dfs = {\n", + " name: drop_unused_columns(\n", + " df.rename(columns=RENAME_COLUMNS, errors=\"raise\")\n", + " )\n", + " for name, df in dfs.items()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sheet: Planilha1\n", + "Index(['ano', 'sigla_uf', 'id_municipio', 'Federal', 'Estadual', 'Municipal',\n", + " 'Privada'],\n", + " dtype='object')\n", + "\n" + ] + } + ], + "source": [ + "for sheet_name, df in dfs.items():\n", + " print(f\"Sheet: {sheet_name}\")\n", + " print(df.columns) # This will print the column names of each DataFrame\n", + " print() # Adds a blank line for readability" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "melted_dataframe = pd.concat(\n", + " [\n", + " df.pipe(\n", + " lambda d: d.loc[(d[\"id_municipio\"].notna()) & (d[\"id_municipio\"] != \" \")]\n", + " )\n", + " .pipe(\n", + " lambda d: pd.melt(\n", + " d,\n", + " id_vars=[\"ano\", \"sigla_uf\", 'id_municipio'],\n", + " value_vars=d.columns.difference([\"id_uf\", \"nome\"]).tolist(), # Convert to list\n", + " var_name=\"rede\",\n", + " value_name=\"quantidade_docente_formacao_continuada\",\n", + " )\n", + " )\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "melted_dataframe = melted_dataframe.sort_values(\n", + " by=['ano', 'sigla_uf','id_municipio', 'rede'], \n", + " ascending=[True, True, True, True])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
anosigla_ufid_municipioredequantidade_docente_formacao_continuada
452012AC1200013Estadual2
576382012AC1200013Federal0
1152312012AC1200013Municipal12
1728242012AC1200013Privada0
462012AC1200104Estadual3
..................
2256132023TO1721307Privada0
528352023TO1722107Estadual0
1104282023TO1722107Federal0
1680212023TO1722107Municipal1
2256142023TO1722107Privada0
\n", + "

230372 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " ano sigla_uf id_municipio rede \\\n", + "45 2012 AC 1200013 Estadual \n", + "57638 2012 AC 1200013 Federal \n", + "115231 2012 AC 1200013 Municipal \n", + "172824 2012 AC 1200013 Privada \n", + "46 2012 AC 1200104 Estadual \n", + "... ... ... ... ... \n", + "225613 2023 TO 1721307 Privada \n", + "52835 2023 TO 1722107 Estadual \n", + "110428 2023 TO 1722107 Federal \n", + "168021 2023 TO 1722107 Municipal \n", + "225614 2023 TO 1722107 Privada \n", + "\n", + " quantidade_docente_formacao_continuada \n", + "45 2 \n", + "57638 0 \n", + "115231 12 \n", + "172824 0 \n", + "46 3 \n", + "... ... \n", + "225613 0 \n", + "52835 0 \n", + "110428 0 \n", + "168021 1 \n", + "225614 0 \n", + "\n", + "[230372 rows x 5 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "melted_dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "path = os.path.join(\n", + " OUTPUT, \"educacao_especial_formacao_docente\"\n", + " )\n", + "\n", + "os.makedirs(path, exist_ok=True)\n", + "melted_dataframe.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/models/br_inep_educacao_especial/code/educacao_especial_matricula_aee.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_matricula_aee.ipynb new file mode 100644 index 00000000..34abfecc --- /dev/null +++ b/models/br_inep_educacao_especial/code/educacao_especial_matricula_aee.ipynb @@ -0,0 +1,458 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import zipfile\n", + "import pandas as pd\n", + "import basedosdados as bd\n", + "\n", + "INPUT = os.path.join(os.getcwd(), \"input\")\n", + "OUTPUT = os.path.join(os.getcwd(), \"output\")\n", + "\n", + "os.makedirs(INPUT, exist_ok=True)\n", + "os.makedirs(OUTPUT, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "def read_sheet(sheet_name: str, skiprows: int = 7) -> pd.DataFrame:\n", + " return pd.read_excel(\n", + " os.path.join(\n", + " INPUT,\n", + " \"mat_ed_especial_aee_uf_rede_2019_2022.xlsx\"\n", + " ),\n", + " skiprows=skiprows,\n", + " sheet_name=sheet_name\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "excel_data = pd.ExcelFile(os.path.join(\n", + " INPUT,\n", + " \"mat_ed_especial_aee_uf_rede_2019_2022.xlsx\"\n", + " ))\n", + "\n", + "# Get the sheet names\n", + "sheet_names = excel_data.sheet_names" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "dfs = {\n", + " sheet_name: read_sheet(sheet_name)\n", + " for sheet_name in sheet_names\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'tabmat': NU_ANO_CENSO NO_REGIAO CO_REGIAO \\\n", + " 0 2022 Brasil NaN \n", + " 1 2022 Brasil NaN \n", + " 2 2022 Brasil NaN \n", + " 3 2022 Norte 1.0 \n", + " 4 2022 Norte 1.0 \n", + " .. ... ... ... \n", + " 333 2019 Centro-Oeste 5.0 \n", + " 334 2019 Centro-Oeste 5.0 \n", + " 335 2019 Centro-Oeste 5.0 \n", + " 336 NaN NaN NaN \n", + " 337 Fonte: Censo Escolar da Educação Básica/Inep. NaN NaN \n", + " \n", + " NO_UF SG_UF CO_UF rede MatEsp MatEspAee \n", + " 0 NaN NaN NaN NaN 1527794.0 568200.0 \n", + " 1 NaN NaN NaN Pública 1301961.0 525868.0 \n", + " 2 NaN NaN NaN Privada 225833.0 42332.0 \n", + " 3 Rondônia RO 11.0 NaN 14341.0 8328.0 \n", + " 4 Rondônia RO 11.0 Pública 12803.0 6965.0 \n", + " .. ... ... ... ... ... ... \n", + " 333 Distrito Federal DF 53.0 NaN 16580.0 8967.0 \n", + " 334 Distrito Federal DF 53.0 Pública 14362.0 8702.0 \n", + " 335 Distrito Federal DF 53.0 Privada 2218.0 265.0 \n", + " 336 NaN NaN NaN NaN NaN NaN \n", + " 337 NaN NaN NaN NaN NaN NaN \n", + " \n", + " [338 rows x 9 columns]}" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfs" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sheet: tabmat\n", + "Index(['NU_ANO_CENSO', 'NO_REGIAO', 'CO_REGIAO', 'NO_UF', 'SG_UF', 'CO_UF',\n", + " 'rede', 'MatEsp', 'MatEspAee'],\n", + " dtype='object')\n", + "\n" + ] + } + ], + "source": [ + "for sheet_name, df in dfs.items():\n", + " print(f\"Sheet: {sheet_name}\")\n", + " print(df.columns) # This will print the column names of each DataFrame\n", + " print() # Adds a blank line for readability" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "RENAME_COLUMNS = {\n", + " 'NU_ANO_CENSO':'ano',\n", + " 'SG_UF':'sigla_uf', \n", + " 'MatEsp':'quantidade_matricula', \n", + " 'MatEspAee':'quantidade_matricula_aee'}" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n", + " cols_drop = [\n", + " col\n", + " for col in df.columns\n", + " if col.startswith(\"NO_\") \n", + " or col.startswith(\"CO\") \n", + " ]\n", + "\n", + " return df.drop(columns=cols_drop)\n", + "\n", + "dfs = {\n", + " name: drop_unused_columns(\n", + " df.rename(columns=RENAME_COLUMNS, errors=\"raise\")\n", + " )\n", + " for name, df in dfs.items()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'tabmat': ano sigla_uf rede \\\n", + " 0 2022 NaN NaN \n", + " 1 2022 NaN Pública \n", + " 2 2022 NaN Privada \n", + " 3 2022 RO NaN \n", + " 4 2022 RO Pública \n", + " .. ... ... ... \n", + " 333 2019 DF NaN \n", + " 334 2019 DF Pública \n", + " 335 2019 DF Privada \n", + " 336 NaN NaN NaN \n", + " 337 Fonte: Censo Escolar da Educação Básica/Inep. NaN NaN \n", + " \n", + " quantidade_matricula quantidade_matricula_aee \n", + " 0 1527794.0 568200.0 \n", + " 1 1301961.0 525868.0 \n", + " 2 225833.0 42332.0 \n", + " 3 14341.0 8328.0 \n", + " 4 12803.0 6965.0 \n", + " .. ... ... \n", + " 333 16580.0 8967.0 \n", + " 334 14362.0 8702.0 \n", + " 335 2218.0 265.0 \n", + " 336 NaN NaN \n", + " 337 NaN NaN \n", + " \n", + " [338 rows x 5 columns]}" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfs" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sheet: tabmat\n", + "Index(['ano', 'sigla_uf', 'rede', 'quantidade_matricula',\n", + " 'quantidade_matricula_aee'],\n", + " dtype='object')\n", + "\n" + ] + } + ], + "source": [ + "for sheet_name, df in dfs.items():\n", + " print(f\"Sheet: {sheet_name}\")\n", + " print(df.columns) # This will print the column names of each DataFrame\n", + " print() " + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "dataframe = pd.DataFrame(dfs['tabmat']).dropna(subset=['sigla_uf', 'rede']).loc[lambda df: (df['sigla_uf'] != \" \") & (df['rede'] != \" \")]" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "dataframe[['quantidade_matricula','quantidade_matricula_aee']] = dataframe[['quantidade_matricula','quantidade_matricula_aee']].astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "dataframe = dataframe.sort_values(by=['ano', 'sigla_uf'], ascending=[True, True])" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
anosigla_ufredequantidade_matriculaquantidade_matricula_aee
2592019ACPública107826395
2602019ACPrivada325261
2952019ALPública2183810145
2962019ALPrivada13141832
2622019AMPública172165074
..................
472022SEPrivada1604227
612022SPPública19170868563
622022SPPrivada52146942
222022TOPública159777222
232022TOPrivada56728
\n", + "

216 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " ano sigla_uf rede quantidade_matricula quantidade_matricula_aee\n", + "259 2019 AC Pública 10782 6395\n", + "260 2019 AC Privada 325 261\n", + "295 2019 AL Pública 21838 10145\n", + "296 2019 AL Privada 1314 1832\n", + "262 2019 AM Pública 17216 5074\n", + ".. ... ... ... ... ...\n", + "47 2022 SE Privada 1604 227\n", + "61 2022 SP Pública 191708 68563\n", + "62 2022 SP Privada 52146 942\n", + "22 2022 TO Pública 15977 7222\n", + "23 2022 TO Privada 567 28\n", + "\n", + "[216 rows x 5 columns]" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "path = os.path.join(\n", + " OUTPUT, \"educacao_especial_aee_matricula\"\n", + " )\n", + "\n", + "os.makedirs(path, exist_ok=True)\n", + "dataframe.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/models/br_inep_educacao_especial/code/educacao_especial_uf_distorcao_idade_serie.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_uf_distorcao_idade_serie.ipynb new file mode 100644 index 00000000..2a9c4ba5 --- /dev/null +++ b/models/br_inep_educacao_especial/code/educacao_especial_uf_distorcao_idade_serie.ipynb @@ -0,0 +1,4066 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import zipfile\n", + "import pandas as pd\n", + "import basedosdados as bd\n", + "\n", + "INPUT = os.path.join(os.getcwd(), \"input\")\n", + "OUTPUT = os.path.join(os.getcwd(), \"output\")\n", + "\n", + "os.makedirs(INPUT, exist_ok=True)\n", + "os.makedirs(OUTPUT, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def read_sheet(sheet_name: str, skiprows: int = 3) -> pd.DataFrame:\n", + " return pd.read_excel(\n", + " os.path.join(\n", + " INPUT,\n", + " \"TDI_-_Alunos_com_defici_ncia.xlsx\"\n", + " ),\n", + " skiprows=skiprows,\n", + " sheet_name=sheet_name\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "excel_data = pd.ExcelFile(os.path.join(\n", + " INPUT,\n", + " \"TDI_-_Alunos_com_defici_ncia.xlsx\"\n", + " ))\n", + "\n", + "# Get the sheet names\n", + "sheet_names = excel_data.sheet_names" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "dfs = {\n", + " sheet_name: read_sheet(sheet_name)\n", + " for sheet_name in sheet_names\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'2007': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 72.2 60.6 \n", + " 1 Norte 76.1 69.7 \n", + " 2 Nordeste 74.4 64.7 \n", + " 3 Sudeste 69.3 56.5 \n", + " 4 Sul 73.3 59.6 \n", + " 5 Centro-Oeste 78.7 73.6 \n", + " 6 Rondônia 72.8 65.8 \n", + " 7 Acre 67.2 60.0 \n", + " 8 Amazonas 76.3 75.7 \n", + " 9 Roraima 71.0 69.5 \n", + " 10 Pará 78.2 68.4 \n", + " 11 Amapá 76.4 81.0 \n", + " 12 Tocantins 75.4 70.7 \n", + " 13 Maranhão 74.2 61.2 \n", + " 14 Piauí 83.0 69.3 \n", + " 15 Ceará 67.8 56.9 \n", + " 16 Rio Grande do Norte 66.4 74.0 \n", + " 17 Paraíba 73.3 71.6 \n", + " 18 Pernambuco 78.6 64.5 \n", + " 19 Alagoas 72.0 79.2 \n", + " 20 Sergipe 85.0 85.4 \n", + " 21 Bahia 76.7 66.5 \n", + " 22 Minas Gerais 79.0 69.1 \n", + " 23 Espírito Santo 67.5 69.0 \n", + " 24 Rio de Janeiro 85.1 77.2 \n", + " 25 São Paulo 60.7 50.2 \n", + " 26 Paraná 72.3 54.2 \n", + " 27 Santa Catarina 75.8 65.3 \n", + " 28 Rio Grande do Sul 73.6 66.1 \n", + " 29 Mato Grosso do Sul 86.0 81.2 \n", + " 30 Mato Grosso 76.3 66.2 \n", + " 31 Goiás 76.8 75.7 \n", + " 32 Distrito Federal 74.6 70.6 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 65.3 \n", + " 1 75.7 \n", + " 2 77.9 \n", + " 3 58.4 \n", + " 4 62.6 \n", + " 5 73.6 \n", + " 6 76.2 \n", + " 7 60.5 \n", + " 8 75.5 \n", + " 9 76.9 \n", + " 10 77.7 \n", + " 11 69.4 \n", + " 12 75.9 \n", + " 13 76.2 \n", + " 14 80.5 \n", + " 15 83.7 \n", + " 16 78.0 \n", + " 17 88.0 \n", + " 18 81.3 \n", + " 19 88.3 \n", + " 20 85.7 \n", + " 21 67.1 \n", + " 22 69.1 \n", + " 23 69.1 \n", + " 24 86.7 \n", + " 25 51.5 \n", + " 26 59.5 \n", + " 27 71.9 \n", + " 28 60.4 \n", + " 29 75.4 \n", + " 30 78.1 \n", + " 31 75.5 \n", + " 32 69.6 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2008': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 66.5 54.5 \n", + " 1 Norte 61.2 44.8 \n", + " 2 Nordeste 60.7 45.7 \n", + " 3 Sudeste 65.7 53.5 \n", + " 4 Sul 73.5 63.2 \n", + " 5 Centro-Oeste 72.3 63.6 \n", + " 6 Rondônia 55.1 35.9 \n", + " 7 Acre 43.8 29.7 \n", + " 8 Amazonas 58.0 57.0 \n", + " 9 Roraima 44.2 28.4 \n", + " 10 Pará 65.1 48.3 \n", + " 11 Amapá 57.0 54.4 \n", + " 12 Tocantins 67.1 46.3 \n", + " 13 Maranhão 62.8 44.3 \n", + " 14 Piauí 66.3 57.9 \n", + " 15 Ceará 55.9 38.6 \n", + " 16 Rio Grande do Norte 47.1 45.2 \n", + " 17 Paraíba 54.0 48.1 \n", + " 18 Pernambuco 73.5 58.6 \n", + " 19 Alagoas 49.0 46.1 \n", + " 20 Sergipe 68.5 58.9 \n", + " 21 Bahia 60.5 44.4 \n", + " 22 Minas Gerais 73.1 61.8 \n", + " 23 Espírito Santo 62.9 61.2 \n", + " 24 Rio de Janeiro 82.7 72.7 \n", + " 25 São Paulo 57.5 48.2 \n", + " 26 Paraná 78.0 60.6 \n", + " 27 Santa Catarina 69.5 64.5 \n", + " 28 Rio Grande do Sul 70.1 65.0 \n", + " 29 Mato Grosso do Sul 84.6 75.7 \n", + " 30 Mato Grosso 72.6 66.2 \n", + " 31 Goiás 68.0 62.1 \n", + " 32 Distrito Federal 62.0 60.0 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 57.3 \n", + " 1 50.7 \n", + " 2 49.4 \n", + " 3 56.6 \n", + " 4 65.0 \n", + " 5 60.5 \n", + " 6 48.8 \n", + " 7 47.2 \n", + " 8 62.2 \n", + " 9 41.2 \n", + " 10 48.7 \n", + " 11 60.0 \n", + " 12 48.2 \n", + " 13 45.3 \n", + " 14 51.0 \n", + " 15 48.7 \n", + " 16 44.2 \n", + " 17 52.3 \n", + " 18 65.1 \n", + " 19 61.1 \n", + " 20 65.7 \n", + " 21 41.9 \n", + " 22 58.6 \n", + " 23 73.5 \n", + " 24 80.2 \n", + " 25 51.5 \n", + " 26 59.6 \n", + " 27 72.5 \n", + " 28 64.0 \n", + " 29 80.0 \n", + " 30 72.7 \n", + " 31 55.8 \n", + " 32 60.7 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2009': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 68.4 54.8 \n", + " 1 Norte 69.8 51.0 \n", + " 2 Nordeste 66.9 50.7 \n", + " 3 Sudeste 65.6 52.1 \n", + " 4 Sul 75.8 63.8 \n", + " 5 Centro-Oeste 73.1 65.6 \n", + " 6 Rondônia 64.2 47.3 \n", + " 7 Acre 51.6 37.8 \n", + " 8 Amazonas 70.6 56.7 \n", + " 9 Roraima 50.6 51.7 \n", + " 10 Pará 74.2 54.0 \n", + " 11 Amapá 64.5 49.2 \n", + " 12 Tocantins 72.8 50.5 \n", + " 13 Maranhão 64.4 48.8 \n", + " 14 Piauí 70.6 47.2 \n", + " 15 Ceará 64.3 44.5 \n", + " 16 Rio Grande do Norte 53.4 57.9 \n", + " 17 Paraíba 70.4 47.9 \n", + " 18 Pernambuco 78.3 64.5 \n", + " 19 Alagoas 54.1 58.9 \n", + " 20 Sergipe 78.2 61.1 \n", + " 21 Bahia 65.5 50.2 \n", + " 22 Minas Gerais 75.4 64.4 \n", + " 23 Espírito Santo 69.9 60.2 \n", + " 24 Rio de Janeiro 84.6 74.3 \n", + " 25 São Paulo 57.0 47.3 \n", + " 26 Paraná 80.6 61.1 \n", + " 27 Santa Catarina 57.4 61.8 \n", + " 28 Rio Grande do Sul 75.0 67.9 \n", + " 29 Mato Grosso do Sul 84.1 78.9 \n", + " 30 Mato Grosso 73.2 69.7 \n", + " 31 Goiás 65.7 61.2 \n", + " 32 Distrito Federal 66.4 64.6 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 56.0 \n", + " 1 57.5 \n", + " 2 56.6 \n", + " 3 52.1 \n", + " 4 62.1 \n", + " 5 62.2 \n", + " 6 45.1 \n", + " 7 43.4 \n", + " 8 61.4 \n", + " 9 21.4 \n", + " 10 62.1 \n", + " 11 54.7 \n", + " 12 55.2 \n", + " 13 48.2 \n", + " 14 61.4 \n", + " 15 52.6 \n", + " 16 51.1 \n", + " 17 50.2 \n", + " 18 69.3 \n", + " 19 64.1 \n", + " 20 68.9 \n", + " 21 54.4 \n", + " 22 59.9 \n", + " 23 69.9 \n", + " 24 74.8 \n", + " 25 46.0 \n", + " 26 57.4 \n", + " 27 57.4 \n", + " 28 70.3 \n", + " 29 80.6 \n", + " 30 70.9 \n", + " 31 56.2 \n", + " 32 67.5 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2010': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 67.5 57.2 \n", + " 1 Norte 70.0 59.4 \n", + " 2 Nordeste 67.1 57.8 \n", + " 3 Sudeste 65.0 52.8 \n", + " 4 Sul 72.4 62.8 \n", + " 5 Centro-Oeste 68.6 67.2 \n", + " 6 Rondônia 67.5 60.7 \n", + " 7 Acre 57.2 49.4 \n", + " 8 Amazonas 70.7 64.2 \n", + " 9 Roraima 50.6 59.2 \n", + " 10 Pará 74.9 62.5 \n", + " 11 Amapá 66.7 54.7 \n", + " 12 Tocantins 68.6 57.5 \n", + " 13 Maranhão 65.2 55.5 \n", + " 14 Piauí 69.2 56.1 \n", + " 15 Ceará 64.2 50.7 \n", + " 16 Rio Grande do Norte 56.2 66.8 \n", + " 17 Paraíba 71.7 56.5 \n", + " 18 Pernambuco 74.9 65.6 \n", + " 19 Alagoas 57.6 60.2 \n", + " 20 Sergipe 78.2 72.2 \n", + " 21 Bahia 67.4 59.3 \n", + " 22 Minas Gerais 69.3 66.1 \n", + " 23 Espírito Santo 59.1 59.0 \n", + " 24 Rio de Janeiro 82.2 73.3 \n", + " 25 São Paulo 58.4 47.4 \n", + " 26 Paraná 76.0 59.0 \n", + " 27 Santa Catarina 54.1 62.1 \n", + " 28 Rio Grande do Sul 73.0 67.8 \n", + " 29 Mato Grosso do Sul 81.2 78.7 \n", + " 30 Mato Grosso 64.6 66.7 \n", + " 31 Goiás 62.2 66.2 \n", + " 32 Distrito Federal 62.7 61.7 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 56.4 \n", + " 1 63.0 \n", + " 2 60.3 \n", + " 3 52.1 \n", + " 4 55.6 \n", + " 5 64.6 \n", + " 6 45.9 \n", + " 7 56.6 \n", + " 8 70.9 \n", + " 9 58.6 \n", + " 10 70.8 \n", + " 11 54.1 \n", + " 12 56.0 \n", + " 13 56.7 \n", + " 14 65.9 \n", + " 15 50.6 \n", + " 16 67.4 \n", + " 17 55.6 \n", + " 18 69.9 \n", + " 19 69.1 \n", + " 20 80.1 \n", + " 21 57.6 \n", + " 22 58.9 \n", + " 23 60.1 \n", + " 24 67.7 \n", + " 25 47.3 \n", + " 26 51.1 \n", + " 27 53.0 \n", + " 28 62.9 \n", + " 29 75.6 \n", + " 30 69.6 \n", + " 31 63.2 \n", + " 32 61.7 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2011': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 66.5 58.5 \n", + " 1 Norte 69.1 63.2 \n", + " 2 Nordeste 66.9 61.9 \n", + " 3 Sudeste 64.7 53.6 \n", + " 4 Sul 69.1 60.6 \n", + " 5 Centro-Oeste 66.1 66.2 \n", + " 6 Rondônia 67.5 61.4 \n", + " 7 Acre 57.1 53.4 \n", + " 8 Amazonas 69.7 69.5 \n", + " 9 Roraima 49.4 65.6 \n", + " 10 Pará 73.4 66.5 \n", + " 11 Amapá 69.8 55.4 \n", + " 12 Tocantins 67.2 62.8 \n", + " 13 Maranhão 64.8 60.2 \n", + " 14 Piauí 67.1 61.0 \n", + " 15 Ceará 62.1 56.8 \n", + " 16 Rio Grande do Norte 57.6 68.9 \n", + " 17 Paraíba 71.2 60.8 \n", + " 18 Pernambuco 74.3 67.4 \n", + " 19 Alagoas 60.4 64.5 \n", + " 20 Sergipe 78.4 73.3 \n", + " 21 Bahia 67.7 61.6 \n", + " 22 Minas Gerais 65.1 65.3 \n", + " 23 Espírito Santo 54.4 58.0 \n", + " 24 Rio de Janeiro 79.6 72.8 \n", + " 25 São Paulo 60.4 47.5 \n", + " 26 Paraná 73.5 56.1 \n", + " 27 Santa Catarina 50.3 60.1 \n", + " 28 Rio Grande do Sul 69.8 66.4 \n", + " 29 Mato Grosso do Sul 78.4 79.3 \n", + " 30 Mato Grosso 58.8 62.5 \n", + " 31 Goiás 60.5 65.1 \n", + " 32 Distrito Federal 63.0 61.0 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 56.1 \n", + " 1 62.9 \n", + " 2 59.9 \n", + " 3 52.5 \n", + " 4 54.3 \n", + " 5 62.5 \n", + " 6 51.0 \n", + " 7 51.2 \n", + " 8 68.9 \n", + " 9 48.1 \n", + " 10 69.1 \n", + " 11 57.7 \n", + " 12 62.9 \n", + " 13 54.6 \n", + " 14 63.0 \n", + " 15 57.7 \n", + " 16 68.8 \n", + " 17 50.5 \n", + " 18 65.9 \n", + " 19 62.8 \n", + " 20 73.5 \n", + " 21 57.1 \n", + " 22 59.8 \n", + " 23 59.1 \n", + " 24 66.3 \n", + " 25 47.4 \n", + " 26 50.7 \n", + " 27 51.3 \n", + " 28 61.2 \n", + " 29 68.9 \n", + " 30 64.6 \n", + " 31 63.8 \n", + " 32 57.2 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2012': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 64.4 58.9 \n", + " 1 Norte 67.0 64.7 \n", + " 2 Nordeste 66.2 63.7 \n", + " 3 Sudeste 62.0 53.1 \n", + " 4 Sul 67.1 61.4 \n", + " 5 Centro-Oeste 62.5 64.3 \n", + " 6 Rondônia 67.0 60.8 \n", + " 7 Acre 54.5 58.5 \n", + " 8 Amazonas 66.0 72.1 \n", + " 9 Roraima 43.5 62.5 \n", + " 10 Pará 70.2 67.7 \n", + " 11 Amapá 68.3 61.9 \n", + " 12 Tocantins 68.3 62.9 \n", + " 13 Maranhão 62.6 60.3 \n", + " 14 Piauí 68.5 61.3 \n", + " 15 Ceará 57.8 59.4 \n", + " 16 Rio Grande do Norte 57.6 69.9 \n", + " 17 Paraíba 69.3 61.9 \n", + " 18 Pernambuco 74.2 69.6 \n", + " 19 Alagoas 61.7 66.2 \n", + " 20 Sergipe 79.5 75.4 \n", + " 21 Bahia 68.0 64.0 \n", + " 22 Minas Gerais 57.8 63.3 \n", + " 23 Espírito Santo 50.4 59.4 \n", + " 24 Rio de Janeiro 78.8 71.8 \n", + " 25 São Paulo 59.6 46.6 \n", + " 26 Paraná 73.8 56.9 \n", + " 27 Santa Catarina 41.5 59.9 \n", + " 28 Rio Grande do Sul 67.0 67.6 \n", + " 29 Mato Grosso do Sul 77.2 78.6 \n", + " 30 Mato Grosso 47.1 57.6 \n", + " 31 Goiás 56.6 63.0 \n", + " 32 Distrito Federal 62.6 59.9 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 55.3 \n", + " 1 62.9 \n", + " 2 60.9 \n", + " 3 51.4 \n", + " 4 51.8 \n", + " 5 61.5 \n", + " 6 53.9 \n", + " 7 52.0 \n", + " 8 66.0 \n", + " 9 57.5 \n", + " 10 70.9 \n", + " 11 52.5 \n", + " 12 61.0 \n", + " 13 57.8 \n", + " 14 58.3 \n", + " 15 59.0 \n", + " 16 67.7 \n", + " 17 52.1 \n", + " 18 67.1 \n", + " 19 63.5 \n", + " 20 73.7 \n", + " 21 59.8 \n", + " 22 59.8 \n", + " 23 56.2 \n", + " 24 60.5 \n", + " 25 46.6 \n", + " 26 46.9 \n", + " 27 49.9 \n", + " 28 59.4 \n", + " 29 69.8 \n", + " 30 57.5 \n", + " 31 64.7 \n", + " 32 53.6 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2013': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 61.8 59.3 \n", + " 1 Norte 65.1 65.2 \n", + " 2 Nordeste 63.8 66.2 \n", + " 3 Sudeste 59.5 52.2 \n", + " 4 Sul 63.0 63.8 \n", + " 5 Centro-Oeste 60.1 61.6 \n", + " 6 Rondônia 66.3 62.2 \n", + " 7 Acre 55.8 56.4 \n", + " 8 Amazonas 63.2 66.0 \n", + " 9 Roraima 38.4 58.2 \n", + " 10 Pará 69.1 69.9 \n", + " 11 Amapá 64.4 60.6 \n", + " 12 Tocantins 63.3 66.0 \n", + " 13 Maranhão 60.5 61.9 \n", + " 14 Piauí 66.0 66.3 \n", + " 15 Ceará 50.2 63.1 \n", + " 16 Rio Grande do Norte 56.4 72.8 \n", + " 17 Paraíba 68.0 67.2 \n", + " 18 Pernambuco 73.1 71.1 \n", + " 19 Alagoas 61.3 65.7 \n", + " 20 Sergipe 76.4 77.6 \n", + " 21 Bahia 66.8 65.8 \n", + " 22 Minas Gerais 52.4 60.5 \n", + " 23 Espírito Santo 47.8 61.8 \n", + " 24 Rio de Janeiro 77.1 74.3 \n", + " 25 São Paulo 58.4 44.8 \n", + " 26 Paraná 70.7 59.6 \n", + " 27 Santa Catarina 35.1 59.7 \n", + " 28 Rio Grande do Sul 62.8 70.3 \n", + " 29 Mato Grosso do Sul 76.4 75.2 \n", + " 30 Mato Grosso 41.9 47.7 \n", + " 31 Goiás 53.5 64.7 \n", + " 32 Distrito Federal 63.5 58.7 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 55.4 \n", + " 1 62.0 \n", + " 2 61.9 \n", + " 3 51.0 \n", + " 4 52.9 \n", + " 5 61.4 \n", + " 6 54.1 \n", + " 7 48.6 \n", + " 8 63.9 \n", + " 9 60.3 \n", + " 10 70.6 \n", + " 11 54.4 \n", + " 12 60.4 \n", + " 13 57.8 \n", + " 14 63.2 \n", + " 15 60.9 \n", + " 16 65.5 \n", + " 17 56.9 \n", + " 18 71.0 \n", + " 19 55.5 \n", + " 20 72.9 \n", + " 21 60.0 \n", + " 22 61.1 \n", + " 23 51.7 \n", + " 24 63.1 \n", + " 25 46.1 \n", + " 26 48.2 \n", + " 27 51.8 \n", + " 28 59.4 \n", + " 29 67.8 \n", + " 30 60.5 \n", + " 31 65.1 \n", + " 32 53.9 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2014': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 58.7 59.6 \n", + " 1 Norte 60.7 65.6 \n", + " 2 Nordeste 60.4 68.1 \n", + " 3 Sudeste 56.4 51.9 \n", + " 4 Sul 61.2 64.5 \n", + " 5 Centro-Oeste 56.0 58.6 \n", + " 6 Rondônia 57.7 63.7 \n", + " 7 Acre 53.6 55.5 \n", + " 8 Amazonas 61.6 67.2 \n", + " 9 Roraima 35.5 54.8 \n", + " 10 Pará 64.8 72.0 \n", + " 11 Amapá 60.4 60.3 \n", + " 12 Tocantins 56.9 65.2 \n", + " 13 Maranhão 59.0 63.6 \n", + " 14 Piauí 62.1 68.2 \n", + " 15 Ceará 42.5 63.9 \n", + " 16 Rio Grande do Norte 52.5 74.8 \n", + " 17 Paraíba 65.0 69.4 \n", + " 18 Pernambuco 71.2 72.7 \n", + " 19 Alagoas 60.1 69.4 \n", + " 20 Sergipe 71.9 81.3 \n", + " 21 Bahia 64.4 67.9 \n", + " 22 Minas Gerais 46.0 56.6 \n", + " 23 Espírito Santo 45.6 63.0 \n", + " 24 Rio de Janeiro 75.3 73.7 \n", + " 25 São Paulo 56.1 44.9 \n", + " 26 Paraná 73.2 61.2 \n", + " 27 Santa Catarina 30.0 57.5 \n", + " 28 Rio Grande do Sul 58.9 70.9 \n", + " 29 Mato Grosso do Sul 75.1 73.4 \n", + " 30 Mato Grosso 35.0 37.8 \n", + " 31 Goiás 46.7 62.0 \n", + " 32 Distrito Federal 62.9 61.1 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 54.5 \n", + " 1 58.9 \n", + " 2 64.7 \n", + " 3 49.3 \n", + " 4 52.4 \n", + " 5 61.0 \n", + " 6 52.9 \n", + " 7 49.4 \n", + " 8 67.0 \n", + " 9 64.4 \n", + " 10 63.8 \n", + " 11 57.8 \n", + " 12 60.8 \n", + " 13 61.1 \n", + " 14 63.6 \n", + " 15 62.2 \n", + " 16 69.0 \n", + " 17 60.3 \n", + " 18 68.6 \n", + " 19 61.3 \n", + " 20 74.1 \n", + " 21 67.8 \n", + " 22 60.5 \n", + " 23 53.9 \n", + " 24 63.1 \n", + " 25 43.5 \n", + " 26 48.4 \n", + " 27 50.4 \n", + " 28 59.0 \n", + " 29 66.4 \n", + " 30 58.7 \n", + " 31 64.4 \n", + " 32 54.2 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2015': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 55.8 59.2 \n", + " 1 Norte 57.8 66.0 \n", + " 2 Nordeste 57.2 67.6 \n", + " 3 Sudeste 53.8 51.1 \n", + " 4 Sul 57.6 63.6 \n", + " 5 Centro-Oeste 53.3 56.2 \n", + " 6 Rondônia 53.0 62.4 \n", + " 7 Acre 50.5 54.1 \n", + " 8 Amazonas 59.2 65.1 \n", + " 9 Roraima 32.1 54.7 \n", + " 10 Pará 62.8 73.7 \n", + " 11 Amapá 55.7 61.7 \n", + " 12 Tocantins 53.1 64.9 \n", + " 13 Maranhão 55.3 63.7 \n", + " 14 Piauí 60.1 67.5 \n", + " 15 Ceará 36.3 60.4 \n", + " 16 Rio Grande do Norte 49.0 74.2 \n", + " 17 Paraíba 62.9 70.4 \n", + " 18 Pernambuco 69.3 73.1 \n", + " 19 Alagoas 58.1 69.9 \n", + " 20 Sergipe 68.2 81.0 \n", + " 21 Bahia 62.7 68.6 \n", + " 22 Minas Gerais 41.7 53.1 \n", + " 23 Espírito Santo 43.0 62.6 \n", + " 24 Rio de Janeiro 72.9 72.3 \n", + " 25 São Paulo 54.5 44.4 \n", + " 26 Paraná 69.7 61.2 \n", + " 27 Santa Catarina 26.5 54.1 \n", + " 28 Rio Grande do Sul 56.0 70.5 \n", + " 29 Mato Grosso do Sul 73.1 69.4 \n", + " 30 Mato Grosso 30.8 34.0 \n", + " 31 Goiás 42.7 60.6 \n", + " 32 Distrito Federal 62.1 59.7 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 53.7 \n", + " 1 58.4 \n", + " 2 64.4 \n", + " 3 48.3 \n", + " 4 52.0 \n", + " 5 58.3 \n", + " 6 53.3 \n", + " 7 49.6 \n", + " 8 68.1 \n", + " 9 54.5 \n", + " 10 65.6 \n", + " 11 55.5 \n", + " 12 58.9 \n", + " 13 61.0 \n", + " 14 65.6 \n", + " 15 62.6 \n", + " 16 69.1 \n", + " 17 60.4 \n", + " 18 66.0 \n", + " 19 57.8 \n", + " 20 73.7 \n", + " 21 67.6 \n", + " 22 60.9 \n", + " 23 54.2 \n", + " 24 62.0 \n", + " 25 41.7 \n", + " 26 48.0 \n", + " 27 50.1 \n", + " 28 58.0 \n", + " 29 67.2 \n", + " 30 52.3 \n", + " 31 61.6 \n", + " 32 52.7 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2016': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 53.5 58.2 \n", + " 1 Norte 55.7 65.6 \n", + " 2 Nordeste 54.3 66.9 \n", + " 3 Sudeste 51.4 49.7 \n", + " 4 Sul 56.9 61.2 \n", + " 5 Centro-Oeste 51.0 55.3 \n", + " 6 Rondônia 47.2 62.7 \n", + " 7 Acre 46.8 55.5 \n", + " 8 Amazonas 57.7 63.3 \n", + " 9 Roraima 28.9 52.9 \n", + " 10 Pará 61.3 73.2 \n", + " 11 Amapá 54.4 62.0 \n", + " 12 Tocantins 52.5 62.7 \n", + " 13 Maranhão 53.1 64.2 \n", + " 14 Piauí 57.3 67.5 \n", + " 15 Ceará 30.9 55.6 \n", + " 16 Rio Grande do Norte 45.4 72.9 \n", + " 17 Paraíba 60.3 71.4 \n", + " 18 Pernambuco 65.8 74.0 \n", + " 19 Alagoas 56.1 70.7 \n", + " 20 Sergipe 66.1 79.4 \n", + " 21 Bahia 61.4 69.9 \n", + " 22 Minas Gerais 39.3 50.9 \n", + " 23 Espírito Santo 40.7 59.8 \n", + " 24 Rio de Janeiro 70.1 70.2 \n", + " 25 São Paulo 51.9 43.3 \n", + " 26 Paraná 69.5 59.3 \n", + " 27 Santa Catarina 24.5 47.9 \n", + " 28 Rio Grande do Sul 54.9 68.4 \n", + " 29 Mato Grosso do Sul 71.9 67.3 \n", + " 30 Mato Grosso 30.0 31.1 \n", + " 31 Goiás 40.0 60.1 \n", + " 32 Distrito Federal 59.7 59.9 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 53.9 \n", + " 1 60.4 \n", + " 2 64.6 \n", + " 3 47.8 \n", + " 4 53.6 \n", + " 5 57.3 \n", + " 6 54.4 \n", + " 7 51.1 \n", + " 8 67.3 \n", + " 9 51.1 \n", + " 10 69.7 \n", + " 11 55.5 \n", + " 12 60.9 \n", + " 13 58.7 \n", + " 14 63.5 \n", + " 15 64.1 \n", + " 16 68.3 \n", + " 17 65.5 \n", + " 18 65.4 \n", + " 19 64.6 \n", + " 20 72.0 \n", + " 21 68.0 \n", + " 22 59.8 \n", + " 23 56.2 \n", + " 24 58.3 \n", + " 25 41.2 \n", + " 26 49.1 \n", + " 27 50.7 \n", + " 28 61.0 \n", + " 29 68.0 \n", + " 30 49.8 \n", + " 31 61.6 \n", + " 32 50.8 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2017': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 51.4 56.8 \n", + " 1 Norte 53.1 64.8 \n", + " 2 Nordeste 52.2 65.3 \n", + " 3 Sudeste 49.4 48.3 \n", + " 4 Sul 54.6 59.2 \n", + " 5 Centro-Oeste 48.2 52.5 \n", + " 6 Rondônia 42.7 60.5 \n", + " 7 Acre 45.1 54.0 \n", + " 8 Amazonas 54.5 62.9 \n", + " 9 Roraima 26.2 50.3 \n", + " 10 Pará 59.9 72.7 \n", + " 11 Amapá 53.8 60.4 \n", + " 12 Tocantins 44.1 61.6 \n", + " 13 Maranhão 50.5 63.3 \n", + " 14 Piauí 54.4 63.7 \n", + " 15 Ceará 28.4 50.8 \n", + " 16 Rio Grande do Norte 43.2 69.6 \n", + " 17 Paraíba 59.1 71.3 \n", + " 18 Pernambuco 62.6 73.2 \n", + " 19 Alagoas 55.1 69.8 \n", + " 20 Sergipe 62.3 78.4 \n", + " 21 Bahia 59.6 70.4 \n", + " 22 Minas Gerais 36.8 49.4 \n", + " 23 Espírito Santo 37.9 58.2 \n", + " 24 Rio de Janeiro 68.0 67.8 \n", + " 25 São Paulo 50.8 41.2 \n", + " 26 Paraná 66.0 58.0 \n", + " 27 Santa Catarina 23.0 44.4 \n", + " 28 Rio Grande do Sul 53.4 66.9 \n", + " 29 Mato Grosso do Sul 71.5 68.3 \n", + " 30 Mato Grosso 29.3 33.3 \n", + " 31 Goiás 37.3 54.6 \n", + " 32 Distrito Federal 56.8 56.1 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 53.1 \n", + " 1 59.9 \n", + " 2 61.8 \n", + " 3 46.7 \n", + " 4 54.9 \n", + " 5 55.5 \n", + " 6 52.2 \n", + " 7 48.1 \n", + " 8 64.1 \n", + " 9 51.2 \n", + " 10 69.8 \n", + " 11 54.6 \n", + " 12 59.5 \n", + " 13 56.2 \n", + " 14 59.9 \n", + " 15 60.8 \n", + " 16 67.9 \n", + " 17 66.9 \n", + " 18 65.2 \n", + " 19 52.6 \n", + " 20 75.2 \n", + " 21 65.7 \n", + " 22 57.4 \n", + " 23 51.5 \n", + " 24 58.6 \n", + " 25 39.6 \n", + " 26 51.2 \n", + " 27 50.5 \n", + " 28 62.0 \n", + " 29 65.6 \n", + " 30 46.4 \n", + " 31 60.5 \n", + " 32 50.5 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2018': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 48.4 55.0 \n", + " 1 Norte 49.9 62.6 \n", + " 2 Nordeste 48.9 63.4 \n", + " 3 Sudeste 46.6 46.8 \n", + " 4 Sul 52.5 57.0 \n", + " 5 Centro-Oeste 44.1 49.8 \n", + " 6 Rondônia 36.6 58.4 \n", + " 7 Acre 41.4 51.5 \n", + " 8 Amazonas 51.6 60.7 \n", + " 9 Roraima 25.2 45.0 \n", + " 10 Pará 57.7 70.7 \n", + " 11 Amapá 51.3 60.2 \n", + " 12 Tocantins 38.7 57.7 \n", + " 13 Maranhão 47.7 61.2 \n", + " 14 Piauí 50.9 62.2 \n", + " 15 Ceará 24.6 46.0 \n", + " 16 Rio Grande do Norte 41.0 67.5 \n", + " 17 Paraíba 57.0 70.7 \n", + " 18 Pernambuco 57.0 70.8 \n", + " 19 Alagoas 50.1 68.2 \n", + " 20 Sergipe 60.8 76.4 \n", + " 21 Bahia 56.8 69.9 \n", + " 22 Minas Gerais 34.0 48.1 \n", + " 23 Espírito Santo 34.5 55.4 \n", + " 24 Rio de Janeiro 64.7 66.6 \n", + " 25 São Paulo 48.3 38.8 \n", + " 26 Paraná 63.0 55.2 \n", + " 27 Santa Catarina 21.7 41.4 \n", + " 28 Rio Grande do Sul 51.9 65.4 \n", + " 29 Mato Grosso do Sul 67.4 67.6 \n", + " 30 Mato Grosso 27.9 34.6 \n", + " 31 Goiás 32.9 50.0 \n", + " 32 Distrito Federal 54.3 53.8 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 53.2 \n", + " 1 61.0 \n", + " 2 61.2 \n", + " 3 47.0 \n", + " 4 54.1 \n", + " 5 53.2 \n", + " 6 51.2 \n", + " 7 48.4 \n", + " 8 61.4 \n", + " 9 52.5 \n", + " 10 70.6 \n", + " 11 56.1 \n", + " 12 58.8 \n", + " 13 56.3 \n", + " 14 55.7 \n", + " 15 57.4 \n", + " 16 67.7 \n", + " 17 66.1 \n", + " 18 63.2 \n", + " 19 60.0 \n", + " 20 75.8 \n", + " 21 63.9 \n", + " 22 55.4 \n", + " 23 52.4 \n", + " 24 60.3 \n", + " 25 38.8 \n", + " 26 49.9 \n", + " 27 47.4 \n", + " 28 63.0 \n", + " 29 62.6 \n", + " 30 43.8 \n", + " 31 58.2 \n", + " 32 47.6 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2019': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 44.9 53.3 \n", + " 1 Norte 46.8 60.9 \n", + " 2 Nordeste 45.1 61.3 \n", + " 3 Sudeste 42.6 45.2 \n", + " 4 Sul 50.1 55.0 \n", + " 5 Centro-Oeste 41.1 48.3 \n", + " 6 Rondônia 30.9 52.9 \n", + " 7 Acre 36.9 49.6 \n", + " 8 Amazonas 49.0 59.5 \n", + " 9 Roraima 24.8 43.4 \n", + " 10 Pará 55.1 69.7 \n", + " 11 Amapá 46.4 59.8 \n", + " 12 Tocantins 35.5 55.8 \n", + " 13 Maranhão 44.7 59.9 \n", + " 14 Piauí 45.6 62.2 \n", + " 15 Ceará 21.6 40.5 \n", + " 16 Rio Grande do Norte 37.6 64.8 \n", + " 17 Paraíba 53.8 70.6 \n", + " 18 Pernambuco 51.2 68.4 \n", + " 19 Alagoas 45.0 65.0 \n", + " 20 Sergipe 57.4 75.1 \n", + " 21 Bahia 53.5 69.5 \n", + " 22 Minas Gerais 30.7 45.6 \n", + " 23 Espírito Santo 30.8 53.4 \n", + " 24 Rio de Janeiro 60.4 65.9 \n", + " 25 São Paulo 43.9 37.5 \n", + " 26 Paraná 61.6 51.5 \n", + " 27 Santa Catarina 20.0 38.7 \n", + " 28 Rio Grande do Sul 49.1 64.2 \n", + " 29 Mato Grosso do Sul 65.0 66.2 \n", + " 30 Mato Grosso 27.5 33.9 \n", + " 31 Goiás 30.2 47.7 \n", + " 32 Distrito Federal 44.8 50.5 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 51.1 \n", + " 1 59.3 \n", + " 2 59.4 \n", + " 3 44.7 \n", + " 4 50.9 \n", + " 5 51.4 \n", + " 6 48.3 \n", + " 7 46.7 \n", + " 8 62.3 \n", + " 9 47.7 \n", + " 10 68.5 \n", + " 11 51.7 \n", + " 12 56.8 \n", + " 13 54.8 \n", + " 14 55.8 \n", + " 15 52.0 \n", + " 16 66.1 \n", + " 17 64.5 \n", + " 18 65.5 \n", + " 19 59.3 \n", + " 20 74.6 \n", + " 21 63.1 \n", + " 22 50.7 \n", + " 23 50.2 \n", + " 24 59.5 \n", + " 25 36.9 \n", + " 26 45.2 \n", + " 27 43.0 \n", + " 28 59.4 \n", + " 29 59.2 \n", + " 30 43.9 \n", + " 31 56.0 \n", + " 32 45.1 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2020': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 41.7 51.6 \n", + " 1 Norte 44.0 58.8 \n", + " 2 Nordeste 40.9 59.3 \n", + " 3 Sudeste 39.3 43.8 \n", + " 4 Sul 48.2 53.7 \n", + " 5 Centro-Oeste 38.1 45.4 \n", + " 6 Rondônia 29.2 47.8 \n", + " 7 Acre 34.9 47.4 \n", + " 8 Amazonas 45.9 57.8 \n", + " 9 Roraima 25.2 37.0 \n", + " 10 Pará 52.0 68.6 \n", + " 11 Amapá 41.1 59.7 \n", + " 12 Tocantins 32.2 52.9 \n", + " 13 Maranhão 41.2 58.6 \n", + " 14 Piauí 39.8 58.2 \n", + " 15 Ceará 18.4 37.1 \n", + " 16 Rio Grande do Norte 33.4 62.5 \n", + " 17 Paraíba 50.3 68.4 \n", + " 18 Pernambuco 43.5 65.2 \n", + " 19 Alagoas 39.7 61.2 \n", + " 20 Sergipe 53.9 74.8 \n", + " 21 Bahia 50.6 68.8 \n", + " 22 Minas Gerais 28.2 44.7 \n", + " 23 Espírito Santo 27.9 51.2 \n", + " 24 Rio de Janeiro 55.4 64.5 \n", + " 25 São Paulo 40.7 35.4 \n", + " 26 Paraná 61.5 49.7 \n", + " 27 Santa Catarina 18.7 37.4 \n", + " 28 Rio Grande do Sul 46.0 63.8 \n", + " 29 Mato Grosso do Sul 61.7 65.3 \n", + " 30 Mato Grosso 24.5 35.0 \n", + " 31 Goiás 29.2 42.5 \n", + " 32 Distrito Federal 39.9 49.1 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 50.3 \n", + " 1 58.5 \n", + " 2 58.5 \n", + " 3 44.2 \n", + " 4 48.8 \n", + " 5 49.4 \n", + " 6 49.1 \n", + " 7 46.7 \n", + " 8 58.6 \n", + " 9 48.3 \n", + " 10 67.6 \n", + " 11 52.2 \n", + " 12 54.8 \n", + " 13 52.4 \n", + " 14 55.8 \n", + " 15 49.2 \n", + " 16 62.7 \n", + " 17 62.1 \n", + " 18 63.7 \n", + " 19 58.8 \n", + " 20 71.3 \n", + " 21 64.9 \n", + " 22 49.6 \n", + " 23 49.4 \n", + " 24 60.4 \n", + " 25 35.7 \n", + " 26 44.3 \n", + " 27 40.0 \n", + " 28 58.0 \n", + " 29 59.3 \n", + " 30 42.7 \n", + " 31 51.9 \n", + " 32 44.3 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2021': Unnamed: 0 Unnamed: 1 \\\n", + " 0 NaN 0 \n", + " 1 NaN 1 \n", + " 2 NaN 2 \n", + " 3 NaN 3 \n", + " 4 NaN 4 \n", + " 5 NaN 5 \n", + " 6 NaN 11 \n", + " 7 NaN 12 \n", + " 8 NaN 13 \n", + " 9 NaN 14 \n", + " 10 NaN 15 \n", + " 11 NaN 16 \n", + " 12 NaN 17 \n", + " 13 NaN 21 \n", + " 14 NaN 22 \n", + " 15 NaN 23 \n", + " 16 NaN 24 \n", + " 17 NaN 25 \n", + " 18 NaN 26 \n", + " 19 NaN 27 \n", + " 20 NaN 28 \n", + " 21 NaN 29 \n", + " 22 NaN 31 \n", + " 23 NaN 32 \n", + " 24 NaN 33 \n", + " 25 NaN 35 \n", + " 26 NaN 41 \n", + " 27 NaN 42 \n", + " 28 NaN 43 \n", + " 29 NaN 50 \n", + " 30 NaN 51 \n", + " 31 NaN 52 \n", + " 32 NaN 53 \n", + " 33 NaN NaN \n", + " 34 NaN Notas: Foram consideradas a rede total (escola... \n", + " \n", + " Unnamed: 2 Distorção Idade-Série Distorção Idade-Série.1 \\\n", + " 0 Brasil 36.6 49.5 \n", + " 1 Norte 36.8 57.7 \n", + " 2 Nordeste 34.2 56.9 \n", + " 3 Sudeste 36.2 41.1 \n", + " 4 Sul 43.1 51.1 \n", + " 5 Centro-Oeste 33.6 43.3 \n", + " 6 Rondônia 23.4 43.6 \n", + " 7 Acre 27.9 46.2 \n", + " 8 Amazonas 39.8 56.7 \n", + " 9 Roraima 20.5 35.8 \n", + " 10 Pará 43.4 68.1 \n", + " 11 Amapá 34.0 58.8 \n", + " 12 Tocantins 27.0 48.7 \n", + " 13 Maranhão 35.3 56.5 \n", + " 14 Piauí 33.2 56.3 \n", + " 15 Ceará 14.6 33.3 \n", + " 16 Rio Grande do Norte 23.6 60.1 \n", + " 17 Paraíba 42.4 65.7 \n", + " 18 Pernambuco 36.4 60.8 \n", + " 19 Alagoas 32.2 57.4 \n", + " 20 Sergipe 44.5 72.2 \n", + " 21 Bahia 42.9 66.6 \n", + " 22 Minas Gerais 25.0 40.4 \n", + " 23 Espírito Santo 20.4 47.6 \n", + " 24 Rio de Janeiro 49.3 63.4 \n", + " 25 São Paulo 38.8 32.6 \n", + " 26 Paraná 60.2 46.1 \n", + " 27 Santa Catarina 14.6 35.1 \n", + " 28 Rio Grande do Sul 37.6 61.6 \n", + " 29 Mato Grosso do Sul 56.0 63.4 \n", + " 30 Mato Grosso 21.1 32.3 \n", + " 31 Goiás 25.2 39.8 \n", + " 32 Distrito Federal 36.0 48.9 \n", + " 33 NaN NaN NaN \n", + " 34 NaN NaN NaN \n", + " \n", + " Distorção Idade-Série.2 \n", + " 0 48.3 \n", + " 1 57.5 \n", + " 2 57.8 \n", + " 3 41.8 \n", + " 4 44.9 \n", + " 5 46.1 \n", + " 6 48.7 \n", + " 7 44.4 \n", + " 8 55.9 \n", + " 9 43.7 \n", + " 10 66.7 \n", + " 11 54.5 \n", + " 12 52.9 \n", + " 13 50.8 \n", + " 14 55.3 \n", + " 15 44.4 \n", + " 16 59.9 \n", + " 17 61.8 \n", + " 18 61.3 \n", + " 19 58.9 \n", + " 20 68.7 \n", + " 21 68.6 \n", + " 22 45.1 \n", + " 23 47.3 \n", + " 24 57.8 \n", + " 25 34.9 \n", + " 26 40.6 \n", + " 27 37.1 \n", + " 28 53.2 \n", + " 29 57.9 \n", + " 30 40.7 \n", + " 31 46.5 \n", + " 32 43.6 \n", + " 33 NaN \n", + " 34 NaN }" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfs" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sheet: 2007\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2008\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2009\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2010\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2011\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2012\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2013\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2014\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2015\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2016\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2017\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2018\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2019\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2020\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2021\n", + "Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Distorção Idade-Série',\n", + " 'Distorção Idade-Série.1', 'Distorção Idade-Série.2'],\n", + " dtype='object')\n", + "\n" + ] + } + ], + "source": [ + "for sheet_name, df in dfs.items():\n", + " print(f\"Sheet: {sheet_name}\")\n", + " print(df.columns) # This will print the column names of each DataFrame\n", + " print() # Adds a blank line for readability\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "dataframes = {}\n", + "\n", + "for table_name, columns in dfs.items():\n", + " df = pd.DataFrame(columns) # Create DataFrame for each table\n", + " dataframes[table_name] = df # Store the DataFrame in a dictionary\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "RENAME_COLUMNS = {\n", + " 'Unnamed: 1':'id_uf',\n", + " 'Unnamed: 2':'nome',\n", + " 'Distorção Idade-Série': 'Ensino Fundamental – Anos Iniciais',\n", + " 'Distorção Idade-Série.1':'Ensino Fundamental – Anos Finais',\n", + " 'Distorção Idade-Série.2':'Ensino Médio Regular' \n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n", + " cols_drop = [\n", + " col\n", + " for col in df.columns\n", + " if col.startswith(\"Unnamed\")\n", + " ]\n", + "\n", + " return df.drop(columns=cols_drop)\n", + "\n", + "dfs = {\n", + " name: drop_unused_columns(\n", + " df.rename(columns=RENAME_COLUMNS, errors=\"raise\")\n", + " )\n", + " for name, df in dfs.items()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'2007': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 72.2 60.6 \n", + " 1 76.1 69.7 \n", + " 2 74.4 64.7 \n", + " 3 69.3 56.5 \n", + " 4 73.3 59.6 \n", + " 5 78.7 73.6 \n", + " 6 72.8 65.8 \n", + " 7 67.2 60.0 \n", + " 8 76.3 75.7 \n", + " 9 71.0 69.5 \n", + " 10 78.2 68.4 \n", + " 11 76.4 81.0 \n", + " 12 75.4 70.7 \n", + " 13 74.2 61.2 \n", + " 14 83.0 69.3 \n", + " 15 67.8 56.9 \n", + " 16 66.4 74.0 \n", + " 17 73.3 71.6 \n", + " 18 78.6 64.5 \n", + " 19 72.0 79.2 \n", + " 20 85.0 85.4 \n", + " 21 76.7 66.5 \n", + " 22 79.0 69.1 \n", + " 23 67.5 69.0 \n", + " 24 85.1 77.2 \n", + " 25 60.7 50.2 \n", + " 26 72.3 54.2 \n", + " 27 75.8 65.3 \n", + " 28 73.6 66.1 \n", + " 29 86.0 81.2 \n", + " 30 76.3 66.2 \n", + " 31 76.8 75.7 \n", + " 32 74.6 70.6 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 65.3 \n", + " 1 75.7 \n", + " 2 77.9 \n", + " 3 58.4 \n", + " 4 62.6 \n", + " 5 73.6 \n", + " 6 76.2 \n", + " 7 60.5 \n", + " 8 75.5 \n", + " 9 76.9 \n", + " 10 77.7 \n", + " 11 69.4 \n", + " 12 75.9 \n", + " 13 76.2 \n", + " 14 80.5 \n", + " 15 83.7 \n", + " 16 78.0 \n", + " 17 88.0 \n", + " 18 81.3 \n", + " 19 88.3 \n", + " 20 85.7 \n", + " 21 67.1 \n", + " 22 69.1 \n", + " 23 69.1 \n", + " 24 86.7 \n", + " 25 51.5 \n", + " 26 59.5 \n", + " 27 71.9 \n", + " 28 60.4 \n", + " 29 75.4 \n", + " 30 78.1 \n", + " 31 75.5 \n", + " 32 69.6 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2008': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 66.5 54.5 \n", + " 1 61.2 44.8 \n", + " 2 60.7 45.7 \n", + " 3 65.7 53.5 \n", + " 4 73.5 63.2 \n", + " 5 72.3 63.6 \n", + " 6 55.1 35.9 \n", + " 7 43.8 29.7 \n", + " 8 58.0 57.0 \n", + " 9 44.2 28.4 \n", + " 10 65.1 48.3 \n", + " 11 57.0 54.4 \n", + " 12 67.1 46.3 \n", + " 13 62.8 44.3 \n", + " 14 66.3 57.9 \n", + " 15 55.9 38.6 \n", + " 16 47.1 45.2 \n", + " 17 54.0 48.1 \n", + " 18 73.5 58.6 \n", + " 19 49.0 46.1 \n", + " 20 68.5 58.9 \n", + " 21 60.5 44.4 \n", + " 22 73.1 61.8 \n", + " 23 62.9 61.2 \n", + " 24 82.7 72.7 \n", + " 25 57.5 48.2 \n", + " 26 78.0 60.6 \n", + " 27 69.5 64.5 \n", + " 28 70.1 65.0 \n", + " 29 84.6 75.7 \n", + " 30 72.6 66.2 \n", + " 31 68.0 62.1 \n", + " 32 62.0 60.0 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 57.3 \n", + " 1 50.7 \n", + " 2 49.4 \n", + " 3 56.6 \n", + " 4 65.0 \n", + " 5 60.5 \n", + " 6 48.8 \n", + " 7 47.2 \n", + " 8 62.2 \n", + " 9 41.2 \n", + " 10 48.7 \n", + " 11 60.0 \n", + " 12 48.2 \n", + " 13 45.3 \n", + " 14 51.0 \n", + " 15 48.7 \n", + " 16 44.2 \n", + " 17 52.3 \n", + " 18 65.1 \n", + " 19 61.1 \n", + " 20 65.7 \n", + " 21 41.9 \n", + " 22 58.6 \n", + " 23 73.5 \n", + " 24 80.2 \n", + " 25 51.5 \n", + " 26 59.6 \n", + " 27 72.5 \n", + " 28 64.0 \n", + " 29 80.0 \n", + " 30 72.7 \n", + " 31 55.8 \n", + " 32 60.7 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2009': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 68.4 54.8 \n", + " 1 69.8 51.0 \n", + " 2 66.9 50.7 \n", + " 3 65.6 52.1 \n", + " 4 75.8 63.8 \n", + " 5 73.1 65.6 \n", + " 6 64.2 47.3 \n", + " 7 51.6 37.8 \n", + " 8 70.6 56.7 \n", + " 9 50.6 51.7 \n", + " 10 74.2 54.0 \n", + " 11 64.5 49.2 \n", + " 12 72.8 50.5 \n", + " 13 64.4 48.8 \n", + " 14 70.6 47.2 \n", + " 15 64.3 44.5 \n", + " 16 53.4 57.9 \n", + " 17 70.4 47.9 \n", + " 18 78.3 64.5 \n", + " 19 54.1 58.9 \n", + " 20 78.2 61.1 \n", + " 21 65.5 50.2 \n", + " 22 75.4 64.4 \n", + " 23 69.9 60.2 \n", + " 24 84.6 74.3 \n", + " 25 57.0 47.3 \n", + " 26 80.6 61.1 \n", + " 27 57.4 61.8 \n", + " 28 75.0 67.9 \n", + " 29 84.1 78.9 \n", + " 30 73.2 69.7 \n", + " 31 65.7 61.2 \n", + " 32 66.4 64.6 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 56.0 \n", + " 1 57.5 \n", + " 2 56.6 \n", + " 3 52.1 \n", + " 4 62.1 \n", + " 5 62.2 \n", + " 6 45.1 \n", + " 7 43.4 \n", + " 8 61.4 \n", + " 9 21.4 \n", + " 10 62.1 \n", + " 11 54.7 \n", + " 12 55.2 \n", + " 13 48.2 \n", + " 14 61.4 \n", + " 15 52.6 \n", + " 16 51.1 \n", + " 17 50.2 \n", + " 18 69.3 \n", + " 19 64.1 \n", + " 20 68.9 \n", + " 21 54.4 \n", + " 22 59.9 \n", + " 23 69.9 \n", + " 24 74.8 \n", + " 25 46.0 \n", + " 26 57.4 \n", + " 27 57.4 \n", + " 28 70.3 \n", + " 29 80.6 \n", + " 30 70.9 \n", + " 31 56.2 \n", + " 32 67.5 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2010': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 67.5 57.2 \n", + " 1 70.0 59.4 \n", + " 2 67.1 57.8 \n", + " 3 65.0 52.8 \n", + " 4 72.4 62.8 \n", + " 5 68.6 67.2 \n", + " 6 67.5 60.7 \n", + " 7 57.2 49.4 \n", + " 8 70.7 64.2 \n", + " 9 50.6 59.2 \n", + " 10 74.9 62.5 \n", + " 11 66.7 54.7 \n", + " 12 68.6 57.5 \n", + " 13 65.2 55.5 \n", + " 14 69.2 56.1 \n", + " 15 64.2 50.7 \n", + " 16 56.2 66.8 \n", + " 17 71.7 56.5 \n", + " 18 74.9 65.6 \n", + " 19 57.6 60.2 \n", + " 20 78.2 72.2 \n", + " 21 67.4 59.3 \n", + " 22 69.3 66.1 \n", + " 23 59.1 59.0 \n", + " 24 82.2 73.3 \n", + " 25 58.4 47.4 \n", + " 26 76.0 59.0 \n", + " 27 54.1 62.1 \n", + " 28 73.0 67.8 \n", + " 29 81.2 78.7 \n", + " 30 64.6 66.7 \n", + " 31 62.2 66.2 \n", + " 32 62.7 61.7 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 56.4 \n", + " 1 63.0 \n", + " 2 60.3 \n", + " 3 52.1 \n", + " 4 55.6 \n", + " 5 64.6 \n", + " 6 45.9 \n", + " 7 56.6 \n", + " 8 70.9 \n", + " 9 58.6 \n", + " 10 70.8 \n", + " 11 54.1 \n", + " 12 56.0 \n", + " 13 56.7 \n", + " 14 65.9 \n", + " 15 50.6 \n", + " 16 67.4 \n", + " 17 55.6 \n", + " 18 69.9 \n", + " 19 69.1 \n", + " 20 80.1 \n", + " 21 57.6 \n", + " 22 58.9 \n", + " 23 60.1 \n", + " 24 67.7 \n", + " 25 47.3 \n", + " 26 51.1 \n", + " 27 53.0 \n", + " 28 62.9 \n", + " 29 75.6 \n", + " 30 69.6 \n", + " 31 63.2 \n", + " 32 61.7 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2011': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 66.5 58.5 \n", + " 1 69.1 63.2 \n", + " 2 66.9 61.9 \n", + " 3 64.7 53.6 \n", + " 4 69.1 60.6 \n", + " 5 66.1 66.2 \n", + " 6 67.5 61.4 \n", + " 7 57.1 53.4 \n", + " 8 69.7 69.5 \n", + " 9 49.4 65.6 \n", + " 10 73.4 66.5 \n", + " 11 69.8 55.4 \n", + " 12 67.2 62.8 \n", + " 13 64.8 60.2 \n", + " 14 67.1 61.0 \n", + " 15 62.1 56.8 \n", + " 16 57.6 68.9 \n", + " 17 71.2 60.8 \n", + " 18 74.3 67.4 \n", + " 19 60.4 64.5 \n", + " 20 78.4 73.3 \n", + " 21 67.7 61.6 \n", + " 22 65.1 65.3 \n", + " 23 54.4 58.0 \n", + " 24 79.6 72.8 \n", + " 25 60.4 47.5 \n", + " 26 73.5 56.1 \n", + " 27 50.3 60.1 \n", + " 28 69.8 66.4 \n", + " 29 78.4 79.3 \n", + " 30 58.8 62.5 \n", + " 31 60.5 65.1 \n", + " 32 63.0 61.0 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 56.1 \n", + " 1 62.9 \n", + " 2 59.9 \n", + " 3 52.5 \n", + " 4 54.3 \n", + " 5 62.5 \n", + " 6 51.0 \n", + " 7 51.2 \n", + " 8 68.9 \n", + " 9 48.1 \n", + " 10 69.1 \n", + " 11 57.7 \n", + " 12 62.9 \n", + " 13 54.6 \n", + " 14 63.0 \n", + " 15 57.7 \n", + " 16 68.8 \n", + " 17 50.5 \n", + " 18 65.9 \n", + " 19 62.8 \n", + " 20 73.5 \n", + " 21 57.1 \n", + " 22 59.8 \n", + " 23 59.1 \n", + " 24 66.3 \n", + " 25 47.4 \n", + " 26 50.7 \n", + " 27 51.3 \n", + " 28 61.2 \n", + " 29 68.9 \n", + " 30 64.6 \n", + " 31 63.8 \n", + " 32 57.2 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2012': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 64.4 58.9 \n", + " 1 67.0 64.7 \n", + " 2 66.2 63.7 \n", + " 3 62.0 53.1 \n", + " 4 67.1 61.4 \n", + " 5 62.5 64.3 \n", + " 6 67.0 60.8 \n", + " 7 54.5 58.5 \n", + " 8 66.0 72.1 \n", + " 9 43.5 62.5 \n", + " 10 70.2 67.7 \n", + " 11 68.3 61.9 \n", + " 12 68.3 62.9 \n", + " 13 62.6 60.3 \n", + " 14 68.5 61.3 \n", + " 15 57.8 59.4 \n", + " 16 57.6 69.9 \n", + " 17 69.3 61.9 \n", + " 18 74.2 69.6 \n", + " 19 61.7 66.2 \n", + " 20 79.5 75.4 \n", + " 21 68.0 64.0 \n", + " 22 57.8 63.3 \n", + " 23 50.4 59.4 \n", + " 24 78.8 71.8 \n", + " 25 59.6 46.6 \n", + " 26 73.8 56.9 \n", + " 27 41.5 59.9 \n", + " 28 67.0 67.6 \n", + " 29 77.2 78.6 \n", + " 30 47.1 57.6 \n", + " 31 56.6 63.0 \n", + " 32 62.6 59.9 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 55.3 \n", + " 1 62.9 \n", + " 2 60.9 \n", + " 3 51.4 \n", + " 4 51.8 \n", + " 5 61.5 \n", + " 6 53.9 \n", + " 7 52.0 \n", + " 8 66.0 \n", + " 9 57.5 \n", + " 10 70.9 \n", + " 11 52.5 \n", + " 12 61.0 \n", + " 13 57.8 \n", + " 14 58.3 \n", + " 15 59.0 \n", + " 16 67.7 \n", + " 17 52.1 \n", + " 18 67.1 \n", + " 19 63.5 \n", + " 20 73.7 \n", + " 21 59.8 \n", + " 22 59.8 \n", + " 23 56.2 \n", + " 24 60.5 \n", + " 25 46.6 \n", + " 26 46.9 \n", + " 27 49.9 \n", + " 28 59.4 \n", + " 29 69.8 \n", + " 30 57.5 \n", + " 31 64.7 \n", + " 32 53.6 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2013': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 61.8 59.3 \n", + " 1 65.1 65.2 \n", + " 2 63.8 66.2 \n", + " 3 59.5 52.2 \n", + " 4 63.0 63.8 \n", + " 5 60.1 61.6 \n", + " 6 66.3 62.2 \n", + " 7 55.8 56.4 \n", + " 8 63.2 66.0 \n", + " 9 38.4 58.2 \n", + " 10 69.1 69.9 \n", + " 11 64.4 60.6 \n", + " 12 63.3 66.0 \n", + " 13 60.5 61.9 \n", + " 14 66.0 66.3 \n", + " 15 50.2 63.1 \n", + " 16 56.4 72.8 \n", + " 17 68.0 67.2 \n", + " 18 73.1 71.1 \n", + " 19 61.3 65.7 \n", + " 20 76.4 77.6 \n", + " 21 66.8 65.8 \n", + " 22 52.4 60.5 \n", + " 23 47.8 61.8 \n", + " 24 77.1 74.3 \n", + " 25 58.4 44.8 \n", + " 26 70.7 59.6 \n", + " 27 35.1 59.7 \n", + " 28 62.8 70.3 \n", + " 29 76.4 75.2 \n", + " 30 41.9 47.7 \n", + " 31 53.5 64.7 \n", + " 32 63.5 58.7 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 55.4 \n", + " 1 62.0 \n", + " 2 61.9 \n", + " 3 51.0 \n", + " 4 52.9 \n", + " 5 61.4 \n", + " 6 54.1 \n", + " 7 48.6 \n", + " 8 63.9 \n", + " 9 60.3 \n", + " 10 70.6 \n", + " 11 54.4 \n", + " 12 60.4 \n", + " 13 57.8 \n", + " 14 63.2 \n", + " 15 60.9 \n", + " 16 65.5 \n", + " 17 56.9 \n", + " 18 71.0 \n", + " 19 55.5 \n", + " 20 72.9 \n", + " 21 60.0 \n", + " 22 61.1 \n", + " 23 51.7 \n", + " 24 63.1 \n", + " 25 46.1 \n", + " 26 48.2 \n", + " 27 51.8 \n", + " 28 59.4 \n", + " 29 67.8 \n", + " 30 60.5 \n", + " 31 65.1 \n", + " 32 53.9 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2014': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 58.7 59.6 \n", + " 1 60.7 65.6 \n", + " 2 60.4 68.1 \n", + " 3 56.4 51.9 \n", + " 4 61.2 64.5 \n", + " 5 56.0 58.6 \n", + " 6 57.7 63.7 \n", + " 7 53.6 55.5 \n", + " 8 61.6 67.2 \n", + " 9 35.5 54.8 \n", + " 10 64.8 72.0 \n", + " 11 60.4 60.3 \n", + " 12 56.9 65.2 \n", + " 13 59.0 63.6 \n", + " 14 62.1 68.2 \n", + " 15 42.5 63.9 \n", + " 16 52.5 74.8 \n", + " 17 65.0 69.4 \n", + " 18 71.2 72.7 \n", + " 19 60.1 69.4 \n", + " 20 71.9 81.3 \n", + " 21 64.4 67.9 \n", + " 22 46.0 56.6 \n", + " 23 45.6 63.0 \n", + " 24 75.3 73.7 \n", + " 25 56.1 44.9 \n", + " 26 73.2 61.2 \n", + " 27 30.0 57.5 \n", + " 28 58.9 70.9 \n", + " 29 75.1 73.4 \n", + " 30 35.0 37.8 \n", + " 31 46.7 62.0 \n", + " 32 62.9 61.1 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 54.5 \n", + " 1 58.9 \n", + " 2 64.7 \n", + " 3 49.3 \n", + " 4 52.4 \n", + " 5 61.0 \n", + " 6 52.9 \n", + " 7 49.4 \n", + " 8 67.0 \n", + " 9 64.4 \n", + " 10 63.8 \n", + " 11 57.8 \n", + " 12 60.8 \n", + " 13 61.1 \n", + " 14 63.6 \n", + " 15 62.2 \n", + " 16 69.0 \n", + " 17 60.3 \n", + " 18 68.6 \n", + " 19 61.3 \n", + " 20 74.1 \n", + " 21 67.8 \n", + " 22 60.5 \n", + " 23 53.9 \n", + " 24 63.1 \n", + " 25 43.5 \n", + " 26 48.4 \n", + " 27 50.4 \n", + " 28 59.0 \n", + " 29 66.4 \n", + " 30 58.7 \n", + " 31 64.4 \n", + " 32 54.2 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2015': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 55.8 59.2 \n", + " 1 57.8 66.0 \n", + " 2 57.2 67.6 \n", + " 3 53.8 51.1 \n", + " 4 57.6 63.6 \n", + " 5 53.3 56.2 \n", + " 6 53.0 62.4 \n", + " 7 50.5 54.1 \n", + " 8 59.2 65.1 \n", + " 9 32.1 54.7 \n", + " 10 62.8 73.7 \n", + " 11 55.7 61.7 \n", + " 12 53.1 64.9 \n", + " 13 55.3 63.7 \n", + " 14 60.1 67.5 \n", + " 15 36.3 60.4 \n", + " 16 49.0 74.2 \n", + " 17 62.9 70.4 \n", + " 18 69.3 73.1 \n", + " 19 58.1 69.9 \n", + " 20 68.2 81.0 \n", + " 21 62.7 68.6 \n", + " 22 41.7 53.1 \n", + " 23 43.0 62.6 \n", + " 24 72.9 72.3 \n", + " 25 54.5 44.4 \n", + " 26 69.7 61.2 \n", + " 27 26.5 54.1 \n", + " 28 56.0 70.5 \n", + " 29 73.1 69.4 \n", + " 30 30.8 34.0 \n", + " 31 42.7 60.6 \n", + " 32 62.1 59.7 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 53.7 \n", + " 1 58.4 \n", + " 2 64.4 \n", + " 3 48.3 \n", + " 4 52.0 \n", + " 5 58.3 \n", + " 6 53.3 \n", + " 7 49.6 \n", + " 8 68.1 \n", + " 9 54.5 \n", + " 10 65.6 \n", + " 11 55.5 \n", + " 12 58.9 \n", + " 13 61.0 \n", + " 14 65.6 \n", + " 15 62.6 \n", + " 16 69.1 \n", + " 17 60.4 \n", + " 18 66.0 \n", + " 19 57.8 \n", + " 20 73.7 \n", + " 21 67.6 \n", + " 22 60.9 \n", + " 23 54.2 \n", + " 24 62.0 \n", + " 25 41.7 \n", + " 26 48.0 \n", + " 27 50.1 \n", + " 28 58.0 \n", + " 29 67.2 \n", + " 30 52.3 \n", + " 31 61.6 \n", + " 32 52.7 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2016': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 53.5 58.2 \n", + " 1 55.7 65.6 \n", + " 2 54.3 66.9 \n", + " 3 51.4 49.7 \n", + " 4 56.9 61.2 \n", + " 5 51.0 55.3 \n", + " 6 47.2 62.7 \n", + " 7 46.8 55.5 \n", + " 8 57.7 63.3 \n", + " 9 28.9 52.9 \n", + " 10 61.3 73.2 \n", + " 11 54.4 62.0 \n", + " 12 52.5 62.7 \n", + " 13 53.1 64.2 \n", + " 14 57.3 67.5 \n", + " 15 30.9 55.6 \n", + " 16 45.4 72.9 \n", + " 17 60.3 71.4 \n", + " 18 65.8 74.0 \n", + " 19 56.1 70.7 \n", + " 20 66.1 79.4 \n", + " 21 61.4 69.9 \n", + " 22 39.3 50.9 \n", + " 23 40.7 59.8 \n", + " 24 70.1 70.2 \n", + " 25 51.9 43.3 \n", + " 26 69.5 59.3 \n", + " 27 24.5 47.9 \n", + " 28 54.9 68.4 \n", + " 29 71.9 67.3 \n", + " 30 30.0 31.1 \n", + " 31 40.0 60.1 \n", + " 32 59.7 59.9 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 53.9 \n", + " 1 60.4 \n", + " 2 64.6 \n", + " 3 47.8 \n", + " 4 53.6 \n", + " 5 57.3 \n", + " 6 54.4 \n", + " 7 51.1 \n", + " 8 67.3 \n", + " 9 51.1 \n", + " 10 69.7 \n", + " 11 55.5 \n", + " 12 60.9 \n", + " 13 58.7 \n", + " 14 63.5 \n", + " 15 64.1 \n", + " 16 68.3 \n", + " 17 65.5 \n", + " 18 65.4 \n", + " 19 64.6 \n", + " 20 72.0 \n", + " 21 68.0 \n", + " 22 59.8 \n", + " 23 56.2 \n", + " 24 58.3 \n", + " 25 41.2 \n", + " 26 49.1 \n", + " 27 50.7 \n", + " 28 61.0 \n", + " 29 68.0 \n", + " 30 49.8 \n", + " 31 61.6 \n", + " 32 50.8 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2017': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 51.4 56.8 \n", + " 1 53.1 64.8 \n", + " 2 52.2 65.3 \n", + " 3 49.4 48.3 \n", + " 4 54.6 59.2 \n", + " 5 48.2 52.5 \n", + " 6 42.7 60.5 \n", + " 7 45.1 54.0 \n", + " 8 54.5 62.9 \n", + " 9 26.2 50.3 \n", + " 10 59.9 72.7 \n", + " 11 53.8 60.4 \n", + " 12 44.1 61.6 \n", + " 13 50.5 63.3 \n", + " 14 54.4 63.7 \n", + " 15 28.4 50.8 \n", + " 16 43.2 69.6 \n", + " 17 59.1 71.3 \n", + " 18 62.6 73.2 \n", + " 19 55.1 69.8 \n", + " 20 62.3 78.4 \n", + " 21 59.6 70.4 \n", + " 22 36.8 49.4 \n", + " 23 37.9 58.2 \n", + " 24 68.0 67.8 \n", + " 25 50.8 41.2 \n", + " 26 66.0 58.0 \n", + " 27 23.0 44.4 \n", + " 28 53.4 66.9 \n", + " 29 71.5 68.3 \n", + " 30 29.3 33.3 \n", + " 31 37.3 54.6 \n", + " 32 56.8 56.1 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 53.1 \n", + " 1 59.9 \n", + " 2 61.8 \n", + " 3 46.7 \n", + " 4 54.9 \n", + " 5 55.5 \n", + " 6 52.2 \n", + " 7 48.1 \n", + " 8 64.1 \n", + " 9 51.2 \n", + " 10 69.8 \n", + " 11 54.6 \n", + " 12 59.5 \n", + " 13 56.2 \n", + " 14 59.9 \n", + " 15 60.8 \n", + " 16 67.9 \n", + " 17 66.9 \n", + " 18 65.2 \n", + " 19 52.6 \n", + " 20 75.2 \n", + " 21 65.7 \n", + " 22 57.4 \n", + " 23 51.5 \n", + " 24 58.6 \n", + " 25 39.6 \n", + " 26 51.2 \n", + " 27 50.5 \n", + " 28 62.0 \n", + " 29 65.6 \n", + " 30 46.4 \n", + " 31 60.5 \n", + " 32 50.5 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2018': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 48.4 55.0 \n", + " 1 49.9 62.6 \n", + " 2 48.9 63.4 \n", + " 3 46.6 46.8 \n", + " 4 52.5 57.0 \n", + " 5 44.1 49.8 \n", + " 6 36.6 58.4 \n", + " 7 41.4 51.5 \n", + " 8 51.6 60.7 \n", + " 9 25.2 45.0 \n", + " 10 57.7 70.7 \n", + " 11 51.3 60.2 \n", + " 12 38.7 57.7 \n", + " 13 47.7 61.2 \n", + " 14 50.9 62.2 \n", + " 15 24.6 46.0 \n", + " 16 41.0 67.5 \n", + " 17 57.0 70.7 \n", + " 18 57.0 70.8 \n", + " 19 50.1 68.2 \n", + " 20 60.8 76.4 \n", + " 21 56.8 69.9 \n", + " 22 34.0 48.1 \n", + " 23 34.5 55.4 \n", + " 24 64.7 66.6 \n", + " 25 48.3 38.8 \n", + " 26 63.0 55.2 \n", + " 27 21.7 41.4 \n", + " 28 51.9 65.4 \n", + " 29 67.4 67.6 \n", + " 30 27.9 34.6 \n", + " 31 32.9 50.0 \n", + " 32 54.3 53.8 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 53.2 \n", + " 1 61.0 \n", + " 2 61.2 \n", + " 3 47.0 \n", + " 4 54.1 \n", + " 5 53.2 \n", + " 6 51.2 \n", + " 7 48.4 \n", + " 8 61.4 \n", + " 9 52.5 \n", + " 10 70.6 \n", + " 11 56.1 \n", + " 12 58.8 \n", + " 13 56.3 \n", + " 14 55.7 \n", + " 15 57.4 \n", + " 16 67.7 \n", + " 17 66.1 \n", + " 18 63.2 \n", + " 19 60.0 \n", + " 20 75.8 \n", + " 21 63.9 \n", + " 22 55.4 \n", + " 23 52.4 \n", + " 24 60.3 \n", + " 25 38.8 \n", + " 26 49.9 \n", + " 27 47.4 \n", + " 28 63.0 \n", + " 29 62.6 \n", + " 30 43.8 \n", + " 31 58.2 \n", + " 32 47.6 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2019': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 44.9 53.3 \n", + " 1 46.8 60.9 \n", + " 2 45.1 61.3 \n", + " 3 42.6 45.2 \n", + " 4 50.1 55.0 \n", + " 5 41.1 48.3 \n", + " 6 30.9 52.9 \n", + " 7 36.9 49.6 \n", + " 8 49.0 59.5 \n", + " 9 24.8 43.4 \n", + " 10 55.1 69.7 \n", + " 11 46.4 59.8 \n", + " 12 35.5 55.8 \n", + " 13 44.7 59.9 \n", + " 14 45.6 62.2 \n", + " 15 21.6 40.5 \n", + " 16 37.6 64.8 \n", + " 17 53.8 70.6 \n", + " 18 51.2 68.4 \n", + " 19 45.0 65.0 \n", + " 20 57.4 75.1 \n", + " 21 53.5 69.5 \n", + " 22 30.7 45.6 \n", + " 23 30.8 53.4 \n", + " 24 60.4 65.9 \n", + " 25 43.9 37.5 \n", + " 26 61.6 51.5 \n", + " 27 20.0 38.7 \n", + " 28 49.1 64.2 \n", + " 29 65.0 66.2 \n", + " 30 27.5 33.9 \n", + " 31 30.2 47.7 \n", + " 32 44.8 50.5 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 51.1 \n", + " 1 59.3 \n", + " 2 59.4 \n", + " 3 44.7 \n", + " 4 50.9 \n", + " 5 51.4 \n", + " 6 48.3 \n", + " 7 46.7 \n", + " 8 62.3 \n", + " 9 47.7 \n", + " 10 68.5 \n", + " 11 51.7 \n", + " 12 56.8 \n", + " 13 54.8 \n", + " 14 55.8 \n", + " 15 52.0 \n", + " 16 66.1 \n", + " 17 64.5 \n", + " 18 65.5 \n", + " 19 59.3 \n", + " 20 74.6 \n", + " 21 63.1 \n", + " 22 50.7 \n", + " 23 50.2 \n", + " 24 59.5 \n", + " 25 36.9 \n", + " 26 45.2 \n", + " 27 43.0 \n", + " 28 59.4 \n", + " 29 59.2 \n", + " 30 43.9 \n", + " 31 56.0 \n", + " 32 45.1 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2020': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 41.7 51.6 \n", + " 1 44.0 58.8 \n", + " 2 40.9 59.3 \n", + " 3 39.3 43.8 \n", + " 4 48.2 53.7 \n", + " 5 38.1 45.4 \n", + " 6 29.2 47.8 \n", + " 7 34.9 47.4 \n", + " 8 45.9 57.8 \n", + " 9 25.2 37.0 \n", + " 10 52.0 68.6 \n", + " 11 41.1 59.7 \n", + " 12 32.2 52.9 \n", + " 13 41.2 58.6 \n", + " 14 39.8 58.2 \n", + " 15 18.4 37.1 \n", + " 16 33.4 62.5 \n", + " 17 50.3 68.4 \n", + " 18 43.5 65.2 \n", + " 19 39.7 61.2 \n", + " 20 53.9 74.8 \n", + " 21 50.6 68.8 \n", + " 22 28.2 44.7 \n", + " 23 27.9 51.2 \n", + " 24 55.4 64.5 \n", + " 25 40.7 35.4 \n", + " 26 61.5 49.7 \n", + " 27 18.7 37.4 \n", + " 28 46.0 63.8 \n", + " 29 61.7 65.3 \n", + " 30 24.5 35.0 \n", + " 31 29.2 42.5 \n", + " 32 39.9 49.1 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 50.3 \n", + " 1 58.5 \n", + " 2 58.5 \n", + " 3 44.2 \n", + " 4 48.8 \n", + " 5 49.4 \n", + " 6 49.1 \n", + " 7 46.7 \n", + " 8 58.6 \n", + " 9 48.3 \n", + " 10 67.6 \n", + " 11 52.2 \n", + " 12 54.8 \n", + " 13 52.4 \n", + " 14 55.8 \n", + " 15 49.2 \n", + " 16 62.7 \n", + " 17 62.1 \n", + " 18 63.7 \n", + " 19 58.8 \n", + " 20 71.3 \n", + " 21 64.9 \n", + " 22 49.6 \n", + " 23 49.4 \n", + " 24 60.4 \n", + " 25 35.7 \n", + " 26 44.3 \n", + " 27 40.0 \n", + " 28 58.0 \n", + " 29 59.3 \n", + " 30 42.7 \n", + " 31 51.9 \n", + " 32 44.3 \n", + " 33 NaN \n", + " 34 NaN ,\n", + " '2021': id_uf nome \\\n", + " 0 0 Brasil \n", + " 1 1 Norte \n", + " 2 2 Nordeste \n", + " 3 3 Sudeste \n", + " 4 4 Sul \n", + " 5 5 Centro-Oeste \n", + " 6 11 Rondônia \n", + " 7 12 Acre \n", + " 8 13 Amazonas \n", + " 9 14 Roraima \n", + " 10 15 Pará \n", + " 11 16 Amapá \n", + " 12 17 Tocantins \n", + " 13 21 Maranhão \n", + " 14 22 Piauí \n", + " 15 23 Ceará \n", + " 16 24 Rio Grande do Norte \n", + " 17 25 Paraíba \n", + " 18 26 Pernambuco \n", + " 19 27 Alagoas \n", + " 20 28 Sergipe \n", + " 21 29 Bahia \n", + " 22 31 Minas Gerais \n", + " 23 32 Espírito Santo \n", + " 24 33 Rio de Janeiro \n", + " 25 35 São Paulo \n", + " 26 41 Paraná \n", + " 27 42 Santa Catarina \n", + " 28 43 Rio Grande do Sul \n", + " 29 50 Mato Grosso do Sul \n", + " 30 51 Mato Grosso \n", + " 31 52 Goiás \n", + " 32 53 Distrito Federal \n", + " 33 NaN NaN \n", + " 34 Notas: Foram consideradas a rede total (escola... NaN \n", + " \n", + " Ensino Fundamental – Anos Iniciais Ensino Fundamental – Anos Finais \\\n", + " 0 36.6 49.5 \n", + " 1 36.8 57.7 \n", + " 2 34.2 56.9 \n", + " 3 36.2 41.1 \n", + " 4 43.1 51.1 \n", + " 5 33.6 43.3 \n", + " 6 23.4 43.6 \n", + " 7 27.9 46.2 \n", + " 8 39.8 56.7 \n", + " 9 20.5 35.8 \n", + " 10 43.4 68.1 \n", + " 11 34.0 58.8 \n", + " 12 27.0 48.7 \n", + " 13 35.3 56.5 \n", + " 14 33.2 56.3 \n", + " 15 14.6 33.3 \n", + " 16 23.6 60.1 \n", + " 17 42.4 65.7 \n", + " 18 36.4 60.8 \n", + " 19 32.2 57.4 \n", + " 20 44.5 72.2 \n", + " 21 42.9 66.6 \n", + " 22 25.0 40.4 \n", + " 23 20.4 47.6 \n", + " 24 49.3 63.4 \n", + " 25 38.8 32.6 \n", + " 26 60.2 46.1 \n", + " 27 14.6 35.1 \n", + " 28 37.6 61.6 \n", + " 29 56.0 63.4 \n", + " 30 21.1 32.3 \n", + " 31 25.2 39.8 \n", + " 32 36.0 48.9 \n", + " 33 NaN NaN \n", + " 34 NaN NaN \n", + " \n", + " Ensino Médio Regular \n", + " 0 48.3 \n", + " 1 57.5 \n", + " 2 57.8 \n", + " 3 41.8 \n", + " 4 44.9 \n", + " 5 46.1 \n", + " 6 48.7 \n", + " 7 44.4 \n", + " 8 55.9 \n", + " 9 43.7 \n", + " 10 66.7 \n", + " 11 54.5 \n", + " 12 52.9 \n", + " 13 50.8 \n", + " 14 55.3 \n", + " 15 44.4 \n", + " 16 59.9 \n", + " 17 61.8 \n", + " 18 61.3 \n", + " 19 58.9 \n", + " 20 68.7 \n", + " 21 68.6 \n", + " 22 45.1 \n", + " 23 47.3 \n", + " 24 57.8 \n", + " 25 34.9 \n", + " 26 40.6 \n", + " 27 37.1 \n", + " 28 53.2 \n", + " 29 57.9 \n", + " 30 40.7 \n", + " 31 46.5 \n", + " 32 43.6 \n", + " 33 NaN \n", + " 34 NaN }" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfs" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sheet: 2007\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2008\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2009\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2010\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2011\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2012\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2013\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2014\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2015\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2016\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2017\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2018\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2019\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2020\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n", + "Sheet: 2021\n", + "Index(['id_uf', 'nome', 'Ensino Fundamental – Anos Iniciais',\n", + " 'Ensino Fundamental – Anos Finais', 'Ensino Médio Regular'],\n", + " dtype='object')\n", + "\n" + ] + } + ], + "source": [ + "for sheet_name, df in dfs.items():\n", + " print(f\"Sheet: {sheet_name}\")\n", + " print(df.columns) # This will print the column names of each DataFrame\n", + " print() # Adds a blank line for readability" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "melted_dataframe = pd.concat(\n", + " [\n", + " df.pipe(\n", + " lambda d: d.loc[(d[\"id_uf\"].astype(str).str.len() == 2)]\n", + " )\n", + " .pipe(\n", + " lambda d: pd.melt(\n", + " d,\n", + " id_vars=[\"id_uf\", \"nome\"],\n", + " value_vars=d.columns.difference([\"id_uf\", \"nome\"]).tolist(), # Convert to list\n", + " var_name=\"etapa_ensino\",\n", + " value_name=\"tdi\",\n", + " )\n", + " )\n", + " .assign(ano=ano)\n", + " for ano, df in dfs.items()\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
id_ufnomeetapa_ensinotdiano
011RondôniaEnsino Fundamental – Anos Finais65.82007
112AcreEnsino Fundamental – Anos Finais60.02007
213AmazonasEnsino Fundamental – Anos Finais75.72007
314RoraimaEnsino Fundamental – Anos Finais69.52007
415ParáEnsino Fundamental – Anos Finais68.42007
..................
7643Rio Grande do SulEnsino Médio Regular53.22021
7750Mato Grosso do SulEnsino Médio Regular57.92021
7851Mato GrossoEnsino Médio Regular40.72021
7952GoiásEnsino Médio Regular46.52021
8053Distrito FederalEnsino Médio Regular43.62021
\n", + "

1215 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " id_uf nome etapa_ensino tdi ano\n", + "0 11 Rondônia Ensino Fundamental – Anos Finais 65.8 2007\n", + "1 12 Acre Ensino Fundamental – Anos Finais 60.0 2007\n", + "2 13 Amazonas Ensino Fundamental – Anos Finais 75.7 2007\n", + "3 14 Roraima Ensino Fundamental – Anos Finais 69.5 2007\n", + "4 15 Pará Ensino Fundamental – Anos Finais 68.4 2007\n", + ".. ... ... ... ... ...\n", + "76 43 Rio Grande do Sul Ensino Médio Regular 53.2 2021\n", + "77 50 Mato Grosso do Sul Ensino Médio Regular 57.9 2021\n", + "78 51 Mato Grosso Ensino Médio Regular 40.7 2021\n", + "79 52 Goiás Ensino Médio Regular 46.5 2021\n", + "80 53 Distrito Federal Ensino Médio Regular 43.6 2021\n", + "\n", + "[1215 rows x 5 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "melted_dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31,\n", + " 32, 33, 35, 41, 42, 43, 50, 51, 52, 53], dtype=object)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "melted_dataframe['id_uf'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "bd_dir = pd.read_csv(\n", + " '/home/vilelaluiza/dados/br_inep_sinopse_estatistica_educacao_basica/input/br_bd_diretorios_brasil.uf.csv'\n", + ")\n", + "\n", + "melted_dataframe[\"nome\"] = (\n", + " melted_dataframe[\"nome\"]\n", + " .astype(str)\n", + " .replace(\n", + " {i[\"nome\"]: i[\"sigla\"] for i in bd_dir.to_dict(\"records\")} # type: ignore\n", + " )\n", + ")\n", + "\n", + "melted_dataframe = melted_dataframe.rename(\n", + " columns={\"nome\": \"sigla_uf\"}, errors=\"raise\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "melted_dataframe = melted_dataframe[\n", + " [\n", + " \"ano\",\n", + " \"sigla_uf\",\n", + " \"etapa_ensino\",\n", + " \"tdi\",\n", + " ]\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
anosigla_ufetapa_ensinotdi
02007ROEnsino Fundamental – Anos Finais65.8
12007ACEnsino Fundamental – Anos Finais60.0
22007AMEnsino Fundamental – Anos Finais75.7
32007RREnsino Fundamental – Anos Finais69.5
42007PAEnsino Fundamental – Anos Finais68.4
...............
762021RSEnsino Médio Regular53.2
772021MSEnsino Médio Regular57.9
782021MTEnsino Médio Regular40.7
792021GOEnsino Médio Regular46.5
802021DFEnsino Médio Regular43.6
\n", + "

1215 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " ano sigla_uf etapa_ensino tdi\n", + "0 2007 RO Ensino Fundamental – Anos Finais 65.8\n", + "1 2007 AC Ensino Fundamental – Anos Finais 60.0\n", + "2 2007 AM Ensino Fundamental – Anos Finais 75.7\n", + "3 2007 RR Ensino Fundamental – Anos Finais 69.5\n", + "4 2007 PA Ensino Fundamental – Anos Finais 68.4\n", + ".. ... ... ... ...\n", + "76 2021 RS Ensino Médio Regular 53.2\n", + "77 2021 MS Ensino Médio Regular 57.9\n", + "78 2021 MT Ensino Médio Regular 40.7\n", + "79 2021 GO Ensino Médio Regular 46.5\n", + "80 2021 DF Ensino Médio Regular 43.6\n", + "\n", + "[1215 rows x 4 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "melted_dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "path = os.path.join(\n", + " OUTPUT, \"educacao_especial_distorcao_idade_serie\"\n", + " )\n", + "\n", + "os.makedirs(path, exist_ok=True)\n", + "melted_dataframe.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/models/br_inep_educacao_especial/code/educacao_especial_uf_taxa_rendimento.ipynb b/models/br_inep_educacao_especial/code/educacao_especial_uf_taxa_rendimento.ipynb new file mode 100644 index 00000000..cba31907 --- /dev/null +++ b/models/br_inep_educacao_especial/code/educacao_especial_uf_taxa_rendimento.ipynb @@ -0,0 +1,815 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import zipfile\n", + "import pandas as pd\n", + "import basedosdados as bd\n", + "\n", + "INPUT = os.path.join(os.getcwd(), \"input\")\n", + "OUTPUT = os.path.join(os.getcwd(), \"output\")\n", + "\n", + "os.makedirs(INPUT, exist_ok=True)\n", + "os.makedirs(OUTPUT, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "def read_sheet(sheet_name: str, skiprows: int = 8) -> pd.DataFrame:\n", + " return pd.read_excel(\n", + " os.path.join(\n", + " INPUT,\n", + " \"tx_rend_brasil_regioes_ufs_esp.xlsx\"\n", + " ),\n", + " skiprows=skiprows,\n", + " sheet_name=sheet_name\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "excel_data = pd.ExcelFile(os.path.join(\n", + " INPUT,\n", + " \"tx_rend_brasil_regioes_ufs_esp.xlsx\"\n", + " ))\n", + "\n", + "# Get the sheet names\n", + "sheet_names = excel_data.sheet_names" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "dfs = {\n", + " sheet_name: read_sheet(sheet_name)\n", + " for sheet_name in sheet_names\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'BRASIL_REGIOES_UFS ': NU_ANO_CENSO UNIDGEO \\\n", + " 0 2007 Brasil \n", + " 1 2007 Norte \n", + " 2 2007 Nordeste \n", + " 3 2007 Sudeste \n", + " 4 2007 Sul \n", + " .. ... ... \n", + " 492 2021 Mato Grosso \n", + " 493 2021 Goiás \n", + " 494 2021 Distrito Federal \n", + " 495 NaN NaN \n", + " 496 Fonte: Censo da Educação Básica 2021/INEP. NaN \n", + " \n", + " NO_CATEGORIA NO_DEPENDENCIA 1_CAT_FUN 1_CAT_FUN_AI 1_CAT_FUN_AF \\\n", + " 0 Total Total 73.1 71.6 78.7 \n", + " 1 Total Total 63.2 61.4 75.5 \n", + " 2 Total Total 67.0 65.7 73.0 \n", + " 3 Total Total 77.0 75.6 81.5 \n", + " 4 Total Total 76.8 76.4 78.1 \n", + " .. ... ... ... ... ... \n", + " 492 Total Total 96.3 96.2 96.4 \n", + " 493 Total Total 96.5 95.2 97.6 \n", + " 494 Total Total 89.1 83.1 95.7 \n", + " 495 NaN NaN NaN NaN NaN \n", + " 496 NaN NaN NaN NaN NaN \n", + " \n", + " 1_CAT_MED 2_CAT_FUN 2_CAT_FUN_AI 2_CAT_FUN_AF 2_CAT_MED 3_CAT_FUN \\\n", + " 0 79.6 22.1 23.8 16.2 13.0 4.8 \n", + " 1 77.5 28.6 30.4 16.6 11.1 8.2 \n", + " 2 77.0 24.7 26.2 17.9 11.3 8.3 \n", + " 3 79.6 19.9 21.4 14.7 14.9 3.1 \n", + " 4 81.4 20.4 21.0 18.3 11.5 2.8 \n", + " .. ... ... ... ... ... ... \n", + " 492 81.9 3.1 3.4 2.7 13.4 0.6 \n", + " 493 97.8 2.8 4.2 1.6 1.3 0.7 \n", + " 494 92.2 10.6 16.4 4.1 7.3 0.3 \n", + " 495 NaN NaN NaN NaN NaN NaN \n", + " 496 NaN NaN NaN NaN NaN NaN \n", + " \n", + " 3_CAT_FUN_AI 3_CAT_FUN_AF 3_CAT_MED \n", + " 0 4.6 5.1 7.4 \n", + " 1 8.2 7.9 11.4 \n", + " 2 8.1 9.1 11.7 \n", + " 3 3.0 3.8 5.5 \n", + " 4 2.6 3.6 7.1 \n", + " .. ... ... ... \n", + " 492 0.4 0.9 4.7 \n", + " 493 0.6 0.8 0.9 \n", + " 494 0.5 0.2 0.5 \n", + " 495 NaN NaN NaN \n", + " 496 NaN NaN NaN \n", + " \n", + " [497 rows x 16 columns]}" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfs" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sheet: BRASIL_REGIOES_UFS \n", + "Index(['NU_ANO_CENSO', 'UNIDGEO', 'NO_CATEGORIA', 'NO_DEPENDENCIA',\n", + " '1_CAT_FUN', '1_CAT_FUN_AI', '1_CAT_FUN_AF', '1_CAT_MED', '2_CAT_FUN',\n", + " '2_CAT_FUN_AI', '2_CAT_FUN_AF', '2_CAT_MED', '3_CAT_FUN',\n", + " '3_CAT_FUN_AI', '3_CAT_FUN_AF', '3_CAT_MED'],\n", + " dtype='object')\n", + "\n" + ] + } + ], + "source": [ + "for sheet_name, df in dfs.items():\n", + " print(f\"Sheet: {sheet_name}\")\n", + " print(df.columns) # This will print the column names of each DataFrame\n", + " print() # Adds a blank line for readability\n" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "RENAME_COLUMNS = {\n", + " 'NU_ANO_CENSO':'ano', \n", + " 'UNIDGEO':'nome_uf',\n", + " '1_CAT_FUN_AI':'taxaaprovacao_anosiniciais', \n", + " '1_CAT_FUN_AF':'taxaaprovacao_anosfinais', \n", + " '1_CAT_MED':'taxaaprovacao_ensinomedio', \n", + " '2_CAT_FUN_AI':'taxareprovacao_anosiniciais', \n", + " '2_CAT_FUN_AF':'taxareprovacao_anosfinais', \n", + " '2_CAT_MED':'taxareprovacao_ensinomedio', \n", + " '3_CAT_FUN_AI':'taxaabandono_anosiniciais', \n", + " '3_CAT_FUN_AF':'taxaabandono_anosfinais', \n", + " '3_CAT_MED' : 'taxaabandono_ensinomedio' \n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame:\n", + " cols_drop = [\n", + " col\n", + " for col in df.columns\n", + " if col.startswith(\"NO_\") \n", + " or col.startswith(\"1_\") \n", + " or col.startswith(\"2_\") \n", + " or col.startswith(\"3_\")\n", + " ]\n", + "\n", + " return df.drop(columns=cols_drop)\n", + "\n", + "dfs = {\n", + " name: drop_unused_columns(\n", + " df.rename(columns=RENAME_COLUMNS, errors=\"raise\")\n", + " )\n", + " for name, df in dfs.items()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'BRASIL_REGIOES_UFS ': ano nome_uf \\\n", + " 0 2007 Brasil \n", + " 1 2007 Norte \n", + " 2 2007 Nordeste \n", + " 3 2007 Sudeste \n", + " 4 2007 Sul \n", + " .. ... ... \n", + " 492 2021 Mato Grosso \n", + " 493 2021 Goiás \n", + " 494 2021 Distrito Federal \n", + " 495 NaN NaN \n", + " 496 Fonte: Censo da Educação Básica 2021/INEP. NaN \n", + " \n", + " taxaaprovacao_anosiniciais taxaaprovacao_anosfinais \\\n", + " 0 71.6 78.7 \n", + " 1 61.4 75.5 \n", + " 2 65.7 73.0 \n", + " 3 75.6 81.5 \n", + " 4 76.4 78.1 \n", + " .. ... ... \n", + " 492 96.2 96.4 \n", + " 493 95.2 97.6 \n", + " 494 83.1 95.7 \n", + " 495 NaN NaN \n", + " 496 NaN NaN \n", + " \n", + " taxaaprovacao_ensinomedio taxareprovacao_anosiniciais \\\n", + " 0 79.6 23.8 \n", + " 1 77.5 30.4 \n", + " 2 77.0 26.2 \n", + " 3 79.6 21.4 \n", + " 4 81.4 21.0 \n", + " .. ... ... \n", + " 492 81.9 3.4 \n", + " 493 97.8 4.2 \n", + " 494 92.2 16.4 \n", + " 495 NaN NaN \n", + " 496 NaN NaN \n", + " \n", + " taxareprovacao_anosfinais taxareprovacao_ensinomedio \\\n", + " 0 16.2 13.0 \n", + " 1 16.6 11.1 \n", + " 2 17.9 11.3 \n", + " 3 14.7 14.9 \n", + " 4 18.3 11.5 \n", + " .. ... ... \n", + " 492 2.7 13.4 \n", + " 493 1.6 1.3 \n", + " 494 4.1 7.3 \n", + " 495 NaN NaN \n", + " 496 NaN NaN \n", + " \n", + " taxaabandono_anosiniciais taxaabandono_anosfinais \\\n", + " 0 4.6 5.1 \n", + " 1 8.2 7.9 \n", + " 2 8.1 9.1 \n", + " 3 3.0 3.8 \n", + " 4 2.6 3.6 \n", + " .. ... ... \n", + " 492 0.4 0.9 \n", + " 493 0.6 0.8 \n", + " 494 0.5 0.2 \n", + " 495 NaN NaN \n", + " 496 NaN NaN \n", + " \n", + " taxaabandono_ensinomedio \n", + " 0 7.4 \n", + " 1 11.4 \n", + " 2 11.7 \n", + " 3 5.5 \n", + " 4 7.1 \n", + " .. ... \n", + " 492 4.7 \n", + " 493 0.9 \n", + " 494 0.5 \n", + " 495 NaN \n", + " 496 NaN \n", + " \n", + " [497 rows x 11 columns]}" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfs" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sheet: BRASIL_REGIOES_UFS \n", + "Index(['ano', 'nome_uf', 'taxaaprovacao_anosiniciais',\n", + " 'taxaaprovacao_anosfinais', 'taxaaprovacao_ensinomedio',\n", + " 'taxareprovacao_anosiniciais', 'taxareprovacao_anosfinais',\n", + " 'taxareprovacao_ensinomedio', 'taxaabandono_anosiniciais',\n", + " 'taxaabandono_anosfinais', 'taxaabandono_ensinomedio'],\n", + " dtype='object')\n", + "\n" + ] + } + ], + "source": [ + "for sheet_name, df in dfs.items():\n", + " print(f\"Sheet: {sheet_name}\")\n", + " print(df.columns) # This will print the column names of each DataFrame\n", + " print() # Adds a blank line for readability" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "melted_dataframe = pd.concat(\n", + " [\n", + " df.pipe(\n", + " lambda d: d.loc[(d[\"nome_uf\"].notna()) & (d[\"nome_uf\"] != \" \")]\n", + " )\n", + " .pipe(\n", + " lambda d: pd.melt(\n", + " d,\n", + " id_vars=[\"ano\", \"nome_uf\"],\n", + " value_vars=d.columns.difference([\"ano\", \"nome_uf\"]).tolist(), # Convert to list\n", + " var_name=\"metrica\",\n", + " value_name=\"valor\",\n", + " )\n", + " )\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
anonome_ufmetricavalor
02007Brasiltaxaabandono_anosfinais5.1
12007Nortetaxaabandono_anosfinais7.9
22007Nordestetaxaabandono_anosfinais9.1
32007Sudestetaxaabandono_anosfinais3.8
42007Sultaxaabandono_anosfinais3.6
...............
44502021Rio Grande do Sultaxareprovacao_ensinomedio1.5
44512021Mato Grosso do Sultaxareprovacao_ensinomedio8.4
44522021Mato Grossotaxareprovacao_ensinomedio13.4
44532021Goiástaxareprovacao_ensinomedio1.3
44542021Distrito Federaltaxareprovacao_ensinomedio7.3
\n", + "

4455 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " ano nome_uf metrica valor\n", + "0 2007 Brasil taxaabandono_anosfinais 5.1\n", + "1 2007 Norte taxaabandono_anosfinais 7.9\n", + "2 2007 Nordeste taxaabandono_anosfinais 9.1\n", + "3 2007 Sudeste taxaabandono_anosfinais 3.8\n", + "4 2007 Sul taxaabandono_anosfinais 3.6\n", + "... ... ... ... ...\n", + "4450 2021 Rio Grande do Sul taxareprovacao_ensinomedio 1.5\n", + "4451 2021 Mato Grosso do Sul taxareprovacao_ensinomedio 8.4\n", + "4452 2021 Mato Grosso taxareprovacao_ensinomedio 13.4\n", + "4453 2021 Goiás taxareprovacao_ensinomedio 1.3\n", + "4454 2021 Distrito Federal taxareprovacao_ensinomedio 7.3\n", + "\n", + "[4455 rows x 4 columns]" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "melted_dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "melted_dataframe['etapa_ensino'] = melted_dataframe['metrica'].apply(\n", + " lambda v: v.split('_')[-1]) # Extracts 'anosiniciais', 'anosfinais', or 'ensinomedio'\n", + "melted_dataframe['tipo_metrica'] = melted_dataframe['metrica'].apply(\n", + " lambda v: v.split('_')[0]) # Extracts 'taxaaprovacao', 'taxareprovacao', 'taxaabandono'\n", + "\n", + "# Pivoting the melted DataFrame to get desired columns\n", + "df_final = melted_dataframe.pivot_table(index=['ano', 'nome_uf', 'etapa_ensino'], \n", + " columns='tipo_metrica', \n", + " values='valor').reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "RENAME_COLUMNS_MELTED = {\n", + " 'taxaabandono':'taxa_abandono', \n", + " 'taxaaprovacao':'taxa_aprovacao',\n", + " 'taxareprovacao':'taxa_reprovacao' \n", + "}\n", + "\n", + "etapa_ensino = {\n", + " 'anosiniciais': 'Ensino Fundamental – Anos Iniciais',\n", + " 'anosfinais':'Ensino Fundamental – Anos Finais',\n", + " 'ensinomedio':'Ensino Médio Regular'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tipo_metricaanonome_ufetapa_ensinotaxaabandonotaxaaprovacaotaxareprovacao
02007Acreanosfinais0.091.98.1
12007Acreanosiniciais4.870.524.7
22007Acreensinomedio6.184.89.1
32007Alagoasanosfinais16.364.818.9
42007Alagoasanosiniciais9.062.228.8
.....................
14802021São Pauloanosiniciais0.569.829.7
14812021São Pauloensinomedio3.694.32.1
14822021Tocantinsanosfinais1.296.22.6
14832021Tocantinsanosiniciais0.488.411.2
14842021Tocantinsensinomedio2.595.22.3
\n", + "

1485 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + "tipo_metrica ano nome_uf etapa_ensino taxaabandono taxaaprovacao \\\n", + "0 2007 Acre anosfinais 0.0 91.9 \n", + "1 2007 Acre anosiniciais 4.8 70.5 \n", + "2 2007 Acre ensinomedio 6.1 84.8 \n", + "3 2007 Alagoas anosfinais 16.3 64.8 \n", + "4 2007 Alagoas anosiniciais 9.0 62.2 \n", + "... ... ... ... ... ... \n", + "1480 2021 São Paulo anosiniciais 0.5 69.8 \n", + "1481 2021 São Paulo ensinomedio 3.6 94.3 \n", + "1482 2021 Tocantins anosfinais 1.2 96.2 \n", + "1483 2021 Tocantins anosiniciais 0.4 88.4 \n", + "1484 2021 Tocantins ensinomedio 2.5 95.2 \n", + "\n", + "tipo_metrica taxareprovacao \n", + "0 8.1 \n", + "1 24.7 \n", + "2 9.1 \n", + "3 18.9 \n", + "4 28.8 \n", + "... ... \n", + "1480 29.7 \n", + "1481 2.1 \n", + "1482 2.6 \n", + "1483 11.2 \n", + "1484 2.3 \n", + "\n", + "[1485 rows x 6 columns]" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_final" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "df_final = df_final.rename(columns=RENAME_COLUMNS_MELTED)\n", + "df_final['etapa_ensino'] = df_final['etapa_ensino'].replace(etapa_ensino)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "bd_dir = pd.read_csv(\n", + " '/home/vilelaluiza/dados/br_inep_sinopse_estatistica_educacao_basica/input/br_bd_diretorios_brasil.uf.csv'\n", + ")\n", + "\n", + "# Perform an inner merge based on 'nome_uf' and 'nome'\n", + "df_final = pd.merge(df_final, bd_dir[['nome', 'sigla']], how='inner', left_on='nome_uf', right_on='nome')\n", + "\n", + "# Rename the 'sigla' column to 'sigla_uf' and drop the 'nome' column\n", + "df_final = df_final.rename(columns={'sigla': 'sigla_uf'}).drop(columns=['nome_uf','nome'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['AC', 'AL', 'AP', 'AM', 'BA', 'CE', 'DF', 'ES', 'GO', 'MA', 'MT',\n", + " 'MS', 'MG', 'PR', 'PB', 'PA', 'PE', 'PI', 'RN', 'RS', 'RJ', 'RO',\n", + " 'RR', 'SC', 'SE', 'SP', 'TO'], dtype=object)" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_final['sigla_uf'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "df_final = df_final[['ano', 'sigla_uf', 'etapa_ensino', 'taxa_aprovacao','taxa_reprovacao','taxa_abandono']]" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "path = os.path.join(\n", + " OUTPUT, \"educacao_especial_taxa_rendimento\"\n", + " )\n", + "\n", + "os.makedirs(path, exist_ok=True)\n", + "df_final.astype(str).to_csv(os.path.join(path, \"data.csv\"), index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/models/br_inep_educacao_especial/schema.yml b/models/br_inep_educacao_especial/schema.yml new file mode 100644 index 00000000..cb02eedd --- /dev/null +++ b/models/br_inep_educacao_especial/schema.yml @@ -0,0 +1,391 @@ +--- +version: 2 +models: + - name: br_inep_educacao_especial__etapa_ensino + description: Número de alunos matriculados na Educação Especial ao longo do tempo + por tipo de classe (Comuns ou Exclusivas) e Etapa de Ensino + tests: + - not_null_proportion_multiple_columns: + at_least: 0.05 + columns: + - name: ano + description: Ano + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__ano') + field: ano.ano + - name: sigla_uf + description: Sigla da Unidade da Federação + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + - name: id_municipio + description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio + - name: tipo_classe + description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes + Exclusivas/Especiais) + - name: etapa_ensino + description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos + Iniciais, Anos Finais, Ensino Fundamental - EJA, ...) + - name: quantidade_matricula + description: Número de matrículas + - name: br_inep_educacao_especial__faixa_etaria + description: Número de alunos matriculados na Educação Especial ao longo do tempo + por tipo de classe (Comuns ou Exclusivas) e faixa etária + tests: + - not_null_proportion_multiple_columns: + at_least: 0.05 + columns: + - name: ano + description: Ano + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__ano') + field: ano.ano + - name: sigla_uf + description: Sigla da Unidade da Federação + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + - name: id_municipio + description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio + - name: tipo_classe + description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes + Exclusivas/Especiais) + - name: faixa_etaria + description: Faixa etária + - name: quantidade_matricula + description: Número de matrículas + - name: br_inep_educacao_especial__localizacao + description: Número de alunos matriculados na Educação Especial ao longo do tempo + por tipo de classe, rede e localização + tests: + - not_null_proportion_multiple_columns: + at_least: 0.05 + columns: + - name: ano + description: Ano + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__ano') + field: ano.ano + - name: sigla_uf + description: Sigla da Unidade da Federação + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + - name: id_municipio + description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio + - name: tipo_classe + description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes + Exclusivas/Especiais) + - name: rede + description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada) + - name: localizacao + description: Localização (e.g. Zona Urbana, Zona Rural) + - name: quantidade_matricula + description: Número de matrículas + - name: br_inep_educacao_especial__sexo_raca_cor + description: Número de alunos matriculados na Educação Especial ao longo do tempo + por tipo de classe, sexo e raça/cor + tests: + - not_null_proportion_multiple_columns: + at_least: 0.05 + columns: + - name: ano + description: Ano + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__ano') + field: ano.ano + - name: sigla_uf + description: Sigla da Unidade da Federação + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + - name: id_municipio + description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio + - name: tipo_classe + description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes + Exclusivas/Especiais) + - name: sexo + description: Sexo + - name: raca_cor + description: Raça/Cor + - name: quantidade_matricula + description: Número de matrículas + - name: br_inep_educacao_especial__tempo_ensino + description: Número de alunos matriculados na Educação Especial ao longo do tempo + por tipo de classe, rede e tempo de ensino (Integral ou Parcial) + tests: + - not_null_proportion_multiple_columns: + at_least: 0.05 + columns: + - name: ano + description: Ano + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__ano') + field: ano.ano + - name: sigla_uf + description: Sigla da Unidade da Federação + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + - name: id_municipio + description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio + - name: tipo_classe + description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes + Exclusivas/Especiais) + - name: rede + description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada) + - name: tempo_ensino + description: Classificação em tempo integral e tempo parcial + - name: quantidade_matricula + description: Número de matrículas + - name: br_inep_educacao_especial__tipo_deficiencia + description: Número de alunos matriculados na Educação Especial ao longo do tempo + por tipo de classe e tipo de deficiência, transtorno global do desenvolvimento + ou altas habilidades/superdotação + tests: + - not_null_proportion_multiple_columns: + at_least: 0.05 + columns: + - name: ano + description: Ano + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__ano') + field: ano.ano + - name: sigla_uf + description: Sigla da Unidade da Federação + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + - name: id_municipio + description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio + - name: tipo_classe + description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes + Exclusivas/Especiais) + - name: tipo_deficiêcia + description: Tipo de deficiência, transtorno global do desenvolvimento ou + altas habilidades/superdotação + - name: quantidade_matricula + description: Número de matrículas + - name: br_inep_educacao_especial__uf_distorcao_idade_serie + description: 'Taxa de distorção idade-série da Educação Especial por UF. Dados + obtidos via Pedido LAI (nº protocolo: 23546066150202238)' + tests: + - not_null_proportion_multiple_columns: + at_least: 0.05 + columns: + - name: ano + description: Ano + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__ano') + field: ano.ano + - name: sigla_uf + description: Sigla da Unidade da Federação + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + - name: etapa_ensino + description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos + Iniciais, Anos Finais, Ensino Fundamental - EJA, ...) + - name: tdi + description: Taxa de distorção idade-série + - name: br_inep_educacao_especial__brasil_distorcao_idade_serie + description: 'Taxa de distorção idade-série da Educação Especial no Brasil. Dados + obtidos via Pedido LAI (nº protocolo: 23546066150202238)' + tests: + - not_null_proportion_multiple_columns: + at_least: 0.05 + columns: + - name: ano + description: Ano + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__ano') + field: ano.ano + - name: etapa_ensino + description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos + Iniciais, Anos Finais, Ensino Fundamental - EJA, ...) + - name: tdi + description: Taxa de distorção idade-série + - name: br_inep_educacao_especial__uf_taxa_rendimento + description: 'Taxas de rendimento (aprovação, reprovação e abandono) da Educação + Especial por UF. Dados obtidos via Pedido LAI (nº protocolo: 23546054413202266)' + tests: + - not_null_proportion_multiple_columns: + at_least: 0.05 + columns: + - name: ano + description: Ano + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__ano') + field: ano.ano + - name: sigla_uf + description: Sigla da Unidade da Federação + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + - name: etapa_ensino + description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos + Iniciais, Anos Finais, Ensino Fundamental - EJA, ...) + - name: taxa_aprovacao + description: Taxa de aprovação + - name: taxa_reprovacao + description: Taxa de reprovação + - name: taxa_abandono + description: Taxa de abandono + - name: br_inep_educacao_especial__brasil_taxa_rendimento + description: 'Taxas de rendimento (aprovação, reprovação e abandono) da Educação + Especial no Brasil. Dados obtidos via Pedido LAI (nº protocolo: 23546054413202266)' + tests: + - not_null_proportion_multiple_columns: + at_least: 0.05 + columns: + - name: ano + description: Ano + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__ano') + field: ano.ano + - name: etapa_ensino + description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos + Iniciais, Anos Finais, Ensino Fundamental - EJA, ...) + - name: taxa_aprovacao + description: Taxa de aprovação + - name: taxa_reprovacao + description: Taxa de reprovação + - name: taxa_abandono + description: Taxa de abandono + - name: br_inep_educacao_especial__docente_aee + description: 'Quantidade de docentes regentes e do Atendimento Educacional Especializado + (AEE) no total e com Formação Continuada em Educação Especial. Dados obtidos + via Pedido LAI (nº protocolo: 23546035869202316)' + tests: + - not_null_proportion_multiple_columns: + at_least: 0.05 + columns: + - name: ano + description: Ano + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__ano') + field: ano.ano + - name: sigla_uf + description: Sigla da Unidade da Federação + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + - name: id_municipio + description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio + - name: quantidade_docente_regente + description: Número de professores regentes + - name: quantidade_docente_aee + description: Número de professores do Atendimento Educacional Especializado + (AEE) + - name: quantidade_docente_regente_formacao_continuada + description: Número de professores regentes com formação continuada sobre + Educação Especial + - name: quantidade_docente_aee_formacao_continuada + description: Número de professores do Atendimento Educacional Especializado + (AEE) com formação continuada sobre Educação Especial + - name: br_inep_educacao_especial__docente_formacao + description: 'Quantidade de docentes da Educação Básica Formação Continuada em + Educação Especial por rede de ensino. Dados obtidos via Pedido LAI (nº protocolo: + 23546049990202406)' + tests: + - not_null_proportion_multiple_columns: + at_least: 0.05 + columns: + - name: ano + description: Ano + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__ano') + field: ano.ano + - name: sigla_uf + description: Sigla da Unidade da Federação + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + - name: id_municipio + description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio + - name: rede + description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada) + - name: quantidade_docente_formacao_continuada + description: Quantidade de docentes da Educação Básica com formação continuada + em Educação Especial + - name: br_inep_educacao_especial__matricula_aee + description: 'Quantidade de matrículas no Atendimento Educacional Especializado + (AEE) por unidade da federação e rede de ensino. Dados obtidos via Pedido LAI + (nº protocolo: 23546086048202330)' + tests: + - not_null_proportion_multiple_columns: + at_least: 0.05 + columns: + - name: ano + description: Ano + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__ano') + field: ano.ano + - name: sigla_uf + description: Sigla da Unidade da Federação + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + - name: rede + description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada) + - name: quantidade_matricula + description: Número de matrículas na Educação Especial + - name: quantidade_matricula_aee + description: Número de matrículas no Atendimento Educacional Especializado + (AEE) diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_deficiencia.sql b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_deficiencia.sql new file mode 100644 index 00000000..384fa0e2 --- /dev/null +++ b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_deficiencia.sql @@ -0,0 +1,23 @@ +{{ + config( + alias="docente_deficiencia", + schema="br_inep_sinopse_estatistica_educacao_basica", + materialized="table", + partition_by={ + "field": "ano", + "data_type": "int64", + "range": {"start": 2012, "end": 2023, "interval": 1}, + }, + cluster_by="sigla_uf", + ) +}} +select + safe_cast(ano as int64) ano, + safe_cast(sigla_uf as string) sigla_uf, + safe_cast(id_municipio as string) id_municipio, + safe_cast(tipo_classe as string) tipo_classe, + safe_cast(deficiencia as string) deficiencia, + safe_cast(quantidade_docente as int64) quantidade_docente, +from + `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.docente_deficiencia` + as t diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_escolaridade.sql b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_escolaridade.sql new file mode 100644 index 00000000..77a3b180 --- /dev/null +++ b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_escolaridade.sql @@ -0,0 +1,23 @@ +{{ + config( + alias="docente_escolaridade", + schema="br_inep_sinopse_estatistica_educacao_basica", + materialized="table", + partition_by={ + "field": "ano", + "data_type": "int64", + "range": {"start": 2007, "end": 2023, "interval": 1}, + }, + cluster_by="sigla_uf", + ) +}} +select + safe_cast(ano as int64) ano, + safe_cast(sigla_uf as string) sigla_uf, + safe_cast(id_municipio as string) id_municipio, + safe_cast(tipo_classe as string) tipo_classe, + safe_cast(escolaridade as string) escolaridade, + safe_cast(replace(quantidade_docente, ".0", "") as int64) quantidade_docente, +from + `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.docente_escolaridade` + as t diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_etapa_ensino.sql b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_etapa_ensino.sql new file mode 100644 index 00000000..73424c9e --- /dev/null +++ b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_etapa_ensino.sql @@ -0,0 +1,24 @@ +{{ + config( + alias="docente_etapa_ensino", + schema="br_inep_sinopse_estatistica_educacao_basica", + materialized="table", + partition_by={ + "field": "ano", + "data_type": "int64", + "range": {"start": 2007, "end": 2023, "interval": 1}, + }, + cluster_by="sigla_uf", + ) +}} + +select + safe_cast(ano as int64) ano, + safe_cast(sigla_uf as string) sigla_uf, + safe_cast(replace(id_municipio, ".0", "") as string) id_municipio, + safe_cast(tipo_classe as string) tipo_classe, + safe_cast(etapa_ensino as string) etapa_ensino, + safe_cast(replace(quantidade_docentes, ".0", "") as int64) quantidade_docente, +from + `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.docente_etapa_ensino` + as t diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_faixa_etaria.sql b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_faixa_etaria_sexo.sql similarity index 77% rename from models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_faixa_etaria.sql rename to models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_faixa_etaria_sexo.sql index a101d605..bd7e3636 100644 --- a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_faixa_etaria.sql +++ b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_faixa_etaria_sexo.sql @@ -1,6 +1,6 @@ {{ config( - alias="educacao_especial_faixa_etaria", + alias="docente_faixa_etaria_sexo", schema="br_inep_sinopse_estatistica_educacao_basica", materialized="table", partition_by={ @@ -17,7 +17,8 @@ select safe_cast(id_municipio as string) id_municipio, safe_cast(tipo_classe as string) tipo_classe, safe_cast(faixa_etaria as string) faixa_etaria, - safe_cast(quantidade_matricula as numeric) quantidade_matricula, + safe_cast(sexo as string) sexo, + safe_cast(quantidade_docente as int64) quantidade_docente, from - `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.educacao_especial_faixa_etaria` + `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.docente_faixa_etaria_sexo` as t diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_localizacao.sql b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_localizacao.sql new file mode 100644 index 00000000..a58cb833 --- /dev/null +++ b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_localizacao.sql @@ -0,0 +1,24 @@ +{{ + config( + alias="docente_localizacao", + schema="br_inep_sinopse_estatistica_educacao_basica", + materialized="table", + partition_by={ + "field": "ano", + "data_type": "int64", + "range": {"start": 2007, "end": 2023, "interval": 1}, + }, + cluster_by="sigla_uf", + ) +}} +select + safe_cast(ano as int64) ano, + safe_cast(sigla_uf as string) sigla_uf, + safe_cast(replace(id_municipio, ".0", "") as string) id_municipio, + safe_cast(tipo_classe as string) tipo_classe, + safe_cast(rede as string) rede, + safe_cast(localizacao as string) localizacao, + safe_cast(quantidade_docente as int64) quantidade_docente, +from + `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.docente_localizacao` + as t diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_localizacao.sql b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_regime_contrato.sql similarity index 74% rename from models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_localizacao.sql rename to models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_regime_contrato.sql index b5619396..74c2d810 100644 --- a/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__educacao_especial_localizacao.sql +++ b/models/br_inep_sinopse_estatistica_educacao_basica/br_inep_sinopse_estatistica_educacao_basica__docente_regime_contrato.sql @@ -1,6 +1,6 @@ {{ config( - alias="educacao_especial_localizacao", + alias="docente_regime_contrato", schema="br_inep_sinopse_estatistica_educacao_basica", materialized="table", partition_by={ @@ -17,8 +17,8 @@ select safe_cast(id_municipio as string) id_municipio, safe_cast(tipo_classe as string) tipo_classe, safe_cast(rede as string) rede, - safe_cast(localizacao as string) localizacao, - safe_cast(quantidade_matricula as numeric) quantidade_matricula, + safe_cast(regime_contrato as string) regime_contrato, + safe_cast(quantidade_docente as int64) quantidade_docente, from - `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.educacao_especial_localizacao` + `basedosdados-staging.br_inep_sinopse_estatistica_educacao_basica_staging.docente_regime_contrato` as t diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_deficiencia.py b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_deficiencia.py new file mode 100644 index 00000000..4473f626 --- /dev/null +++ b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_deficiencia.py @@ -0,0 +1,172 @@ +import os +import zipfile +import pandas as pd +import basedosdados as bd +import numpy as np + +INPUT = os.path.join(os.getcwd(), "input") +OUTPUT = os.path.join(os.getcwd(), "output") + +# os.makedirs(INPUT, exist_ok=True) +# os.makedirs(OUTPUT, exist_ok=True) + +RENAME_DEFICIENCIA = { + "Educacao Especial - Classes Comuns": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Cegueira": "Cegueira", + "Baixa Visão": "Baixa Visão", + "Surdez": "Surdez", + "Deficiência Auditiva": "Deficiência Auditiva", + "Surdocegueira": "Surdocegueira", + "Deficiência Física": "Deficiência Física", + "Deficiência Intelectual": "Deficiência Intelectual", + "Deficiência Múltipla": "Deficiência Múltipla", + # "Transtorno do Espectro Autista": "Transtorno do Espectro Autista", + # "Altas Habilidades / Superdotação": "Altas Habilidades / Superdotação", + }, + "Educacao Especial - Classes Exclusivas": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Cegueira": "Cegueira", + "Baixa Visão": "Baixa Visão", + "Surdez": "Surdez", + "Deficiência Auditiva": "Deficiência Auditiva", + "Surdocegueira": "Surdocegueira", + "Deficiência Física": "Deficiência Física", + "Deficiência Intelectual": "Deficiência Intelectual", + "Deficiência Múltipla": "Deficiência Múltipla", + # "Transtorno do Espectro Autista": "Transtorno do Espectro Autista", + # "Altas Habilidades / Superdotação": "Altas Habilidades / Superdotação", + }, +} + +deficiencia = { + "educacao_especial_classes_comuns": { + "dicionario": RENAME_DEFICIENCIA["Educacao Especial - Classes Comuns"], + "chave": "2.48", + "valor": "Educacao Especial - Classes Comuns", + "skiprows": 7, + "table": "docente_deficiencia", + }, + "educacao_especial_classes_exclusivas": { + "dicionario": RENAME_DEFICIENCIA["Educacao Especial - Classes Exclusivas"], + "chave": "2.54", + "valor": "Educacao Especial - Classes Exclusivas", + "skiprows": 7, + "table": "docente_deficiencia", + }, +} + + +def read_sheet( + table: str, ano: int, chave: str, valor: str, dicionario: dict, skiprows +) -> pd.DataFrame: + print("Tratando dados de", valor, ano) + path_excel = os.path.join( + INPUT, + f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}", + f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}.xlsx", + ) + + df = pd.read_excel( + path_excel, + skiprows=skiprows, + sheet_name=chave, + ) + + sheets_etapa_ensino_serie = {chave: valor} + + dfs_deficiencia = { + name: pd.read_excel(path_excel, skiprows=skiprows, sheet_name=sheet_name) + for sheet_name, name in sheets_etapa_ensino_serie.items() + } + + dataframes = {} + + for table_name, columns in dfs_deficiencia.items(): + df = pd.DataFrame(columns) # Create DataFrame for each table + dataframes[table_name] = df # Store the DataFrame in a dictionary + + print(df.columns) + + def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame: + cols_drop = [ + col + for col in df.columns + if col.startswith("Unnamed") or col.startswith("Total") + ] + + return df.drop(columns=cols_drop) + + dfs_deficiencia = { + name: drop_unused_columns( + df.rename(columns=RENAME_DEFICIENCIA[name], errors="raise") + ) + for name, df in dfs_deficiencia.items() + } + + df_deficiencia = pd.concat( + [ + df.pipe( + lambda d: d.loc[(d["id_municipio"].notna()) & (d["id_municipio"] != " "),] + ) + .pipe( + lambda d: pd.melt( + d, + id_vars=["id_municipio", "uf"], + value_vars=d.columns.difference( + ["id_municipio", "uf"] + ).tolist(), # Convert to list + var_name="deficiencia", + value_name="quantidade_docente", + ) + ) + .assign(tipo_classe=tipo_classe) + for tipo_classe, df in dfs_deficiencia.items() + ] + ) + + bd_dir = bd.read_sql( + "SELECT nome, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`", + billing_project_id="basedosdados", + reauth=False, + ) + + df_deficiencia["uf"] = df_deficiencia["uf"].apply(lambda uf: uf.strip()).replace({i["nome"]: i["sigla"] for i in bd_dir.to_dict("records")}) # type: ignore + + df_deficiencia = df_deficiencia.rename(columns={"uf": "sigla_uf"}, errors="raise") + + df_deficiencia["quantidade_docente"] = df_deficiencia["quantidade_docente"].astype( + int + ) + + print("Particionando dados") + for sigla_uf, df in df_deficiencia.groupby("sigla_uf"): + path = os.path.join(OUTPUT, f"{table}", f"ano={ano}", f"sigla_uf={sigla_uf}") + if not os.path.exists(path): + os.makedirs(path, exist_ok=True) + df.drop(columns=["sigla_uf"]).to_csv( + os.path.join(path, "data.csv"), index=False, mode="w" + ) + else: + df.drop(columns=["sigla_uf"]).to_csv( + os.path.join(path, "data.csv"), index=False, mode="a", header=False + ) + +if __name__ == '__main__' : + lista = [ + "educacao_especial_classes_comuns", + "educacao_especial_classes_exclusivas", + ] + + for x in lista: + # for ano in range(2012, 2019): + read_sheet( + table=deficiencia[x]["table"], + ano=2011, + chave=deficiencia[x]["chave"], + valor=deficiencia[x]["valor"], + dicionario=deficiencia[x]["dicionario"], + skiprows=deficiencia[x]["skiprows"], + ) diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_escolaridade.py b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_escolaridade.py new file mode 100644 index 00000000..179dadf4 --- /dev/null +++ b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_escolaridade.py @@ -0,0 +1,347 @@ +import os +import zipfile +import pandas as pd +import basedosdados as bd +import numpy as np + + +INPUT = os.path.join(os.getcwd(), "input") +OUTPUT = os.path.join(os.getcwd(), "output") + +# os.makedirs(INPUT, exist_ok=True) +# os.makedirs(OUTPUT, exist_ok=True) + +RENAMES_ETAPA_ENSINO_SERIE = { + "Educacao Basica": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Unnamed: 5": "Ensino Fundamental", + "Unnamed: 6": "Ensino Médio", + "Com Licenciatura10": "Graduação - Com Licenciatura", + "Sem Licenciatura": "Graduação - Sem Licenciatura", + "Especialização": "Pós Graduação - Especialização", + "Mestrado": "Pós Graduação - Mestrado", + "Doutorado": "Pós Graduação - Doutorado", + }, + "Educacao Infantil - Creche": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Unnamed: 5": "Ensino Fundamental", + "Unnamed: 6": "Ensino Médio", + "Com Licenciatura10": "Graduação - Com Licenciatura", + "Sem Licenciatura": "Graduação - Sem Licenciatura", + "Especialização": "Pós Graduação - Especialização", + "Mestrado": "Pós Graduação - Mestrado", + "Doutorado": "Pós Graduação - Doutorado", + }, + "Educacao Infantil - Pré-Escola": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Unnamed: 5": "Ensino Fundamental", + "Unnamed: 6": "Ensino Médio", + "Com Licenciatura10": "Graduação - Com Licenciatura", + "Sem Licenciatura": "Graduação - Sem Licenciatura", + "Especialização": "Pós Graduação - Especialização", + "Mestrado": "Pós Graduação - Mestrado", + "Doutorado": "Pós Graduação - Doutorado", + }, + "Ensino Fundamental": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Unnamed: 5": "Ensino Fundamental", + "Unnamed: 6": "Ensino Médio", + "Com Licenciatura9": "Graduação - Com Licenciatura", + "Sem Licenciatura": "Graduação - Sem Licenciatura", + "Especialização": "Pós Graduação - Especialização", + "Mestrado": "Pós Graduação - Mestrado", + "Doutorado": "Pós Graduação - Doutorado", + }, + "Ensino Fundamental - Anos Iniciais": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Unnamed: 5": "Ensino Fundamental", + "Unnamed: 6": "Ensino Médio", + "Com Licenciatura9": "Graduação - Com Licenciatura", + "Sem Licenciatura": "Graduação - Sem Licenciatura", + "Especialização": "Pós Graduação - Especialização", + "Mestrado": "Pós Graduação - Mestrado", + "Doutorado": "Pós Graduação - Doutorado", + }, + "Ensino Fundamental - Anos Finais": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Unnamed: 5": "Ensino Fundamental", + "Unnamed: 6": "Ensino Médio", + "Com Licenciatura9": "Graduação - Com Licenciatura", + "Sem Licenciatura": "Graduação - Sem Licenciatura", + "Especialização": "Pós Graduação - Especialização", + "Mestrado": "Pós Graduação - Mestrado", + "Doutorado": "Pós Graduação - Doutorado", + }, + "Ensino Médio": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Unnamed: 5": "Ensino Fundamental", + "Unnamed: 6": "Ensino Médio", + "Com Licenciatura9": "Graduação - Com Licenciatura", + "Sem Licenciatura": "Graduação - Sem Licenciatura", + "Especialização": "Pós Graduação - Especialização", + "Mestrado": "Pós Graduação - Mestrado", + "Doutorado": "Pós Graduação - Doutorado", + }, + "Educacao Profissional": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Unnamed: 5": "Ensino Fundamental", + "Unnamed: 6": "Ensino Médio", + "Com Licenciatura9": "Graduação - Com Licenciatura", + "Sem Licenciatura": "Graduação - Sem Licenciatura", + "Especialização": "Pós Graduação - Especialização", + "Mestrado": "Pós Graduação - Mestrado", + "Doutorado": "Pós Graduação - Doutorado", + }, + "EJA": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Unnamed: 5": "Ensino Fundamental", + "Unnamed: 6": "Ensino Médio", + "Com Licenciatura9": "Graduação - Com Licenciatura", + "Sem Licenciatura": "Graduação - Sem Licenciatura", + "Especialização": "Pós Graduação - Especialização", + "Mestrado": "Pós Graduação - Mestrado", + "Doutorado": "Pós Graduação - Doutorado", + }, + "Educacao Especial - Classes Comuns": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Unnamed: 5": "Ensino Fundamental", + "Unnamed: 6": "Ensino Médio", + "Com Licenciatura9": "Graduação - Com Licenciatura", + "Sem Licenciatura": "Graduação - Sem Licenciatura", + "Especialização": "Pós Graduação - Especialização", + "Mestrado": "Pós Graduação - Mestrado", + "Doutorado": "Pós Graduação - Doutorado", + }, + "Educacao Especial - Classes Exclusivas": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Unnamed: 5": "Ensino Fundamental", + "Unnamed: 6": "Ensino Médio", + "Com Licenciatura8": "Graduação - Com Licenciatura", + "Sem Licenciatura": "Graduação - Sem Licenciatura", + "Especialização": "Pós Graduação - Especialização", + "Mestrado": "Pós Graduação - Mestrado", + "Doutorado": "Pós Graduação - Doutorado", + }, +} + + +escolaridade = { + "educacao_basica": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Basica"], + "chave": "2.4", + "valor": "Educacao Basica", + "skiprows": 9, + "table": "docente_escolaridade", + }, + "ensino_infantil_creche": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil - Creche"], + # "chave": "2.10", + "chave": "2.9", # Para anos anteriores a 2010 + "valor": "Educacao Infantil - Creche", + "skiprows": 9, + "table": "docente_escolaridade", + }, + "educacao_infantil_pre_escola": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil - Pré-Escola"], + # "chave": "2.14", + "chave": "2.12", # Para anos anteriores a 2010 + "valor": "Educacao Infantil - Pré-Escola", + "skiprows": 9, + "table": "docente_escolaridade", + }, + "ensino_fundamental": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental"], + "chave": "2.19", + "chave": "2.16", # Para anos anteriores a 2010 + "valor": "Ensino Fundamental", + "skiprows": 9, + "table": "docente_escolaridade", + }, + "ensino_fundamental_anos_iniciais": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental - Anos Iniciais"], + # "chave": "2.23", + "chave": "2.19", # Para anos anteriores a 2010 + "valor": "Ensino Fundamental - Anos Iniciais", + "skiprows": 9, + "table": "docente_escolaridade", + }, + "ensino_fundamental_anos_finais": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental - Anos Finais"], + # "chave": "2.27", + "chave": "2.22", # Para anos anteriores a 2010 + "valor": "Ensino Fundamental - Anos Finais", + "skiprows": 9, + "table": "docente_escolaridade", + }, + "ensino_medio": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Médio"], + # "chave": "2.31", + "chave": "2.25", # Para anos anteriores a 2010 + "valor": "Ensino Médio", + "skiprows": 9, + "table": "docente_escolaridade", + }, + "educacao_profissional": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Profissional"], + # "chave": "2.36", + "chave": "2.29", # Para anos anteriores a 2010 + "valor": "Educacao Profissional", + "skiprows": 9, + "table": "docente_escolaridade", + }, + "EJA": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["EJA"], + # "chave": "2.41", + "chave": "2.33", # Para anos anteriores a 2010 + "valor": "EJA", + "skiprows": 9, + "table": "docente_escolaridade", + }, + "educacao_especial_classes_comuns": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Especial - Classes Comuns"], + # "chave": "2.47", + "chave": "2.38", # Para anos anteriores a 2010 + "valor": "Educacao Especial - Classes Comuns", + "skiprows": 9, + "table": "docente_escolaridade", + }, + "educacao_especial_classes_exclusivas": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE[ + "Educacao Especial - Classes Exclusivas" + ], + # "chave": "2.53", + "chave": "2.52", # Para o ano de 2011 + "chave": "2.42", # Para anos anteriores a 2010 + "valor": "Educacao Especial - Classes Exclusivas", + "skiprows": 9, + "table": "docente_escolaridade", + }, +} + + +def read_sheet( + table: str, ano: int, chave: str, valor: str, dicionario: dict, skiprows: int = 9 +) -> pd.DataFrame: + print("Tratando dados de", valor, ano) + path_excel = os.path.join( + INPUT, + f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}", + f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}.xlsx", + ) + df = pd.read_excel( + path_excel, + skiprows=skiprows, + sheet_name=chave, + ) + + sheets_escolaridade = {chave: valor} + + dfs_escolaridade = { + name: pd.read_excel( + path_excel, + skiprows=skiprows, + sheet_name=sheet_name, + ) + for sheet_name, name in sheets_escolaridade.items() + } + + dataframes = {} + for table_name, columns in dfs_escolaridade.items(): + df = pd.DataFrame(columns) + dataframes[table_name] = df + + print(df.columns) + + def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame: + cols_drop = [ + col + for col in df.columns + if col.startswith("Unnamed") or col.startswith("Total") + ] + + return df.drop(columns=cols_drop) + + dfs_escolaridade = { + name: drop_unused_columns(df.rename(columns=dicionario, errors="raise")) + for name, df in dfs_escolaridade.items() + } + + df_escolaridade = pd.concat( + [ + df.pipe( + lambda d: d.loc[(d["id_municipio"].notna()) & (d["id_municipio"] != " "),] + ) + .pipe( + lambda d: pd.melt( + d, + id_vars=["id_municipio", "uf"], + value_vars=d.columns.difference( + ["id_municipio", "uf"] + ).tolist(), # Convert to list + var_name="escolaridade", + value_name="quantidade_docente", + ) + ) + .assign(tipo_classe=tipo_classe) + for tipo_classe, df in dfs_escolaridade.items() + ] + ) + + bd_dir = bd.read_sql( + "SELECT nome, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`", + billing_project_id="basedosdados", + reauth=False, + ) + + df_escolaridade["uf"] = df_escolaridade["uf"].apply(lambda uf: uf.strip()).replace({i["nome"]: i["sigla"] for i in bd_dir.to_dict("records")}) # type: ignore + + df_escolaridade = df_escolaridade.rename(columns={"uf": "sigla_uf"}, errors="raise") + + print("Particionando dados") + for sigla_uf, df in df_escolaridade.groupby("sigla_uf"): + path = os.path.join(OUTPUT, f"{table}", f"ano={ano}", f"sigla_uf={sigla_uf}") + if not os.path.exists(path): + os.makedirs(path, exist_ok=True) + df.drop(columns=["sigla_uf"]).to_csv( + os.path.join(path, "data.csv"), index=False, mode="w" + ) + else: + df.drop(columns=["sigla_uf"]).to_csv( + os.path.join(path, "data.csv"), index=False, mode="a", header=False + ) + +if __name__ == '__main__' : + lista = [ + "educacao_basica", + "ensino_infantil_creche", + "educacao_infantil_pre_escola", + "ensino_fundamental", + "ensino_fundamental_anos_iniciais", + "ensino_fundamental_anos_finais", + "ensino_medio", + "educacao_profissional", + "EJA", + "educacao_especial_classes_comuns", + "educacao_especial_classes_exclusivas", + ] + + for x in lista: + read_sheet( + table=escolaridade[x]["table"], + ano=2007, + chave=escolaridade[x]["chave"], + valor=escolaridade[x]["valor"], + dicionario=escolaridade[x]["dicionario"], + skiprows=escolaridade[x]["skiprows"], + ) diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_faixa_etaria_sexo.py b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_faixa_etaria_sexo.py new file mode 100644 index 00000000..fb16a601 --- /dev/null +++ b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_faixa_etaria_sexo.py @@ -0,0 +1,444 @@ +import os +import zipfile +import pandas as pd +import basedosdados as bd +import numpy as np + + +INPUT = os.path.join(os.getcwd(), "input") +OUTPUT = os.path.join(os.getcwd(), "output") + +# os.makedirs(INPUT, exist_ok=True) +# os.makedirs(OUTPUT, exist_ok=True) + +RENAMES_ETAPA_ENSINO_SERIE = { + "Educacao Basica": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Até 24 anos": "Feminino_Até 24 anos", + "De 25 a 29 anos": "Feminino_25 a 29 anos", + "De 30 a 39 anos": "Feminino_30 a 39 anos", + "De 40 a 49 anos": "Feminino_40 a 49 anos", + "De 50 a 54 anos": "Feminino_50 a 54 anos", + "De 55 a 59 anos": "Feminino_55 a 59 anos", + "60 anos ou mais": "Feminino_60 anos ou mais", + "Até 24 anos.1": "Masculino_Até 24 anos", + "De 25 a 29 anos.1": "Masculino_25 a 29 anos", + "De 30 a 39 anos.1": "Masculino_30 a 39 anos", + "De 40 a 49 anos.1": "Masculino_40 a 49 anos", + "De 50 a 54 anos.1": "Masculino_50 a 54 anos", + "De 55 a 59 anos.1": "Masculino_55 a 59 anos", + "60 anos ou mais.1": "Masculino_60 anos ou mais", + }, + "Educacao Infantil - Creche": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Até 24 anos": "Feminino_Até 24 anos", + "De 25 a 29 anos": "Feminino_25 a 29 anos", + "De 30 a 39 anos": "Feminino_30 a 39 anos", + "De 40 a 49 anos": "Feminino_40 a 49 anos", + "De 50 a 54 anos": "Feminino_50 a 54 anos", + "De 55 a 59 anos": "Feminino_55 a 59 anos", + "60 anos ou mais": "Feminino_60 anos ou mais", + "Até 24 anos.1": "Masculino_Até 24 anos", + "De 25 a 29 anos.1": "Masculino_25 a 29 anos", + "De 30 a 39 anos.1": "Masculino_30 a 39 anos", + "De 40 a 49 anos.1": "Masculino_40 a 49 anos", + "De 50 a 54 anos.1": "Masculino_50 a 54 anos", + "De 55 a 59 anos.1": "Masculino_55 a 59 anos", + "60 anos ou mais.1": "Masculino_60 anos ou mais", + }, + "Educacao Infantil - Pré-Escola": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Até 24 anos": "Feminino_Até 24 anos", + "De 25 a 29 anos": "Feminino_25 a 29 anos", + "De 30 a 39 anos": "Feminino_30 a 39 anos", + "De 40 a 49 anos": "Feminino_40 a 49 anos", + "De 50 a 54 anos": "Feminino_50 a 54 anos", + "De 55 a 59 anos": "Feminino_55 a 59 anos", + "60 anos ou mais": "Feminino_60 anos ou mais", + "Até 24 anos.1": "Masculino_Até 24 anos", + "De 25 a 29 anos.1": "Masculino_25 a 29 anos", + "De 30 a 39 anos.1": "Masculino_30 a 39 anos", + "De 40 a 49 anos.1": "Masculino_40 a 49 anos", + "De 50 a 54 anos.1": "Masculino_50 a 54 anos", + "De 55 a 59 anos.1": "Masculino_55 a 59 anos", + "60 anos ou mais.1": "Masculino_60 anos ou mais", + }, + "Ensino Fundamental": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Até 24 anos": "Feminino_Até 24 anos", + "De 25 a 29 anos": "Feminino_25 a 29 anos", + "De 30 a 39 anos": "Feminino_30 a 39 anos", + "De 40 a 49 anos": "Feminino_40 a 49 anos", + "De 50 a 54 anos": "Feminino_50 a 54 anos", + "De 55 a 59 anos": "Feminino_55 a 59 anos", + "60 anos ou mais": "Feminino_60 anos ou mais", + "Até 24 anos.1": "Masculino_Até 24 anos", + "De 25 a 29 anos.1": "Masculino_25 a 29 anos", + "De 30 a 39 anos.1": "Masculino_30 a 39 anos", + "De 40 a 49 anos.1": "Masculino_40 a 49 anos", + "De 50 a 54 anos.1": "Masculino_50 a 54 anos", + "De 55 a 59 anos.1": "Masculino_55 a 59 anos", + "60 anos ou mais.1": "Masculino_60 anos ou mais", + }, + "Ensino Fundamental - Anos Iniciais": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Até 24 anos": "Feminino_Até 24 anos", + "De 25 a 29 anos": "Feminino_25 a 29 anos", + "De 30 a 39 anos": "Feminino_30 a 39 anos", + "De 40 a 49 anos": "Feminino_40 a 49 anos", + "De 50 a 54 anos": "Feminino_50 a 54 anos", + "De 55 a 59 anos": "Feminino_55 a 59 anos", + "60 anos ou mais": "Feminino_60 anos ou mais", + "Até 24 anos.1": "Masculino_Até 24 anos", + "De 25 a 29 anos.1": "Masculino_25 a 29 anos", + "De 30 a 39 anos.1": "Masculino_30 a 39 anos", + "De 40 a 49 anos.1": "Masculino_40 a 49 anos", + "De 50 a 54 anos.1": "Masculino_50 a 54 anos", + "De 55 a 59 anos.1": "Masculino_55 a 59 anos", + "60 anos ou mais.1": "Masculino_60 anos ou mais", + }, + "Ensino Fundamental - Anos Finais": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Até 24 anos": "Feminino_Até 24 anos", + "De 25 a 29 anos": "Feminino_25 a 29 anos", + "De 30 a 39 anos": "Feminino_30 a 39 anos", + "De 40 a 49 anos": "Feminino_40 a 49 anos", + "De 50 a 54 anos": "Feminino_50 a 54 anos", + "De 55 a 59 anos": "Feminino_55 a 59 anos", + "60 anos ou mais": "Feminino_60 anos ou mais", + "Até 24 anos.1": "Masculino_Até 24 anos", + "De 25 a 29 anos.1": "Masculino_25 a 29 anos", + "De 30 a 39 anos.1": "Masculino_30 a 39 anos", + "De 40 a 49 anos.1": "Masculino_40 a 49 anos", + "De 50 a 54 anos.1": "Masculino_50 a 54 anos", + "De 55 a 59 anos.1": "Masculino_55 a 59 anos", + "60 anos ou mais.1": "Masculino_60 anos ou mais", + }, + "Ensino Médio": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Até 24 anos": "Feminino_Até 24 anos", + "De 25 a 29 anos": "Feminino_25 a 29 anos", + "De 30 a 39 anos": "Feminino_30 a 39 anos", + "De 40 a 49 anos": "Feminino_40 a 49 anos", + "De 50 a 54 anos": "Feminino_50 a 54 anos", + "De 55 a 59 anos": "Feminino_55 a 59 anos", + "60 anos ou mais": "Feminino_60 anos ou mais", + "Até 24 anos.1": "Masculino_Até 24 anos", + "De 25 a 29 anos.1": "Masculino_25 a 29 anos", + "De 30 a 39 anos.1": "Masculino_30 a 39 anos", + "De 40 a 49 anos.1": "Masculino_40 a 49 anos", + "De 50 a 54 anos.1": "Masculino_50 a 54 anos", + "De 55 a 59 anos.1": "Masculino_55 a 59 anos", + "60 anos ou mais.1": "Masculino_60 anos ou mais", + }, + "Educacao Profissional": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Até 24 anos": "Feminino_Até 24 anos", + "De 25 a 29 anos": "Feminino_25 a 29 anos", + "De 30 a 39 anos": "Feminino_30 a 39 anos", + "De 40 a 49 anos": "Feminino_40 a 49 anos", + "De 50 a 54 anos": "Feminino_50 a 54 anos", + "De 55 a 59 anos": "Feminino_55 a 59 anos", + "60 anos ou mais": "Feminino_60 anos ou mais", + "Até 24 anos.1": "Masculino_Até 24 anos", + "De 25 a 29 anos.1": "Masculino_25 a 29 anos", + "De 30 a 39 anos.1": "Masculino_30 a 39 anos", + "De 40 a 49 anos.1": "Masculino_40 a 49 anos", + "De 50 a 54 anos.1": "Masculino_50 a 54 anos", + "De 55 a 59 anos.1": "Masculino_55 a 59 anos", + "60 anos ou mais.1": "Masculino_60 anos ou mais", + }, + "EJA": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Até 24 anos": "Feminino_Até 24 anos", + "De 25 a 29 anos": "Feminino_25 a 29 anos", + "De 30 a 39 anos": "Feminino_30 a 39 anos", + "De 40 a 49 anos": "Feminino_40 a 49 anos", + "De 50 a 54 anos": "Feminino_50 a 54 anos", + "De 55 a 59 anos": "Feminino_55 a 59 anos", + "60 anos ou mais": "Feminino_60 anos ou mais", + "Até 24 anos.1": "Masculino_Até 24 anos", + "De 25 a 29 anos.1": "Masculino_25 a 29 anos", + "De 30 a 39 anos.1": "Masculino_30 a 39 anos", + "De 40 a 49 anos.1": "Masculino_40 a 49 anos", + "De 50 a 54 anos.1": "Masculino_50 a 54 anos", + "De 55 a 59 anos.1": "Masculino_55 a 59 anos", + "60 anos ou mais.1": "Masculino_60 anos ou mais", + }, + "Educacao Especial - Classes Comuns": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Até 24 anos": "Feminino_Até 24 anos", + "De 25 a 29 anos": "Feminino_25 a 29 anos", + "De 30 a 39 anos": "Feminino_30 a 39 anos", + "De 40 a 49 anos": "Feminino_40 a 49 anos", + "De 50 a 54 anos": "Feminino_50 a 54 anos", + "De 55 a 59 anos": "Feminino_55 a 59 anos", + "60 anos ou mais": "Feminino_60 anos ou mais", + "Até 24 anos.1": "Masculino_Até 24 anos", + "De 25 a 29 anos.1": "Masculino_25 a 29 anos", + "De 30 a 39 anos.1": "Masculino_30 a 39 anos", + "De 40 a 49 anos.1": "Masculino_40 a 49 anos", + "De 50 a 54 anos.1": "Masculino_50 a 54 anos", + "De 55 a 59 anos.1": "Masculino_55 a 59 anos", + "60 anos ou mais.1": "Masculino_60 anos ou mais", + }, + "Educacao Especial - Classes Exclusivas": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Até 24 anos": "Feminino_Até 24 anos", + "De 25 a 29 anos": "Feminino_25 a 29 anos", + "De 30 a 39 anos": "Feminino_30 a 39 anos", + "De 40 a 49 anos": "Feminino_40 a 49 anos", + "De 50 a 54 anos": "Feminino_50 a 54 anos", + "De 55 a 59 anos": "Feminino_55 a 59 anos", + "60 anos ou mais": "Feminino_60 anos ou mais", + "Até 24 anos.1": "Masculino_Até 24 anos", + "De 25 a 29 anos.1": "Masculino_25 a 29 anos", + "De 30 a 39 anos.1": "Masculino_30 a 39 anos", + "De 40 a 49 anos.1": "Masculino_40 a 49 anos", + "De 50 a 54 anos.1": "Masculino_50 a 54 anos", + "De 55 a 59 anos.1": "Masculino_55 a 59 anos", + "60 anos ou mais.1": "Masculino_60 anos ou mais", + }, +} + + +localizacao = { + "educacao_basica": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Basica"], + "chave": "2.3", + "valor": "Educacao Basica", + "skiprows": 8, + "table": "docente_faixa_etaria_sexo", + }, + "educacao_infantil_creche": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil - Creche"], + #"chave": "2.9", + "chave": "2.8", # para anos anteriores a 2010 + "valor": "Educacao Infantil - Creche", + "skiprows": 8, + "table": "docente_faixa_etaria_sexo", + }, + "educacao_infantil_pre_escola": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil - Pré-Escola"], + #"chave": "2.13", + "chave": "2.11", # Para anos anteriores a 2010 + "valor": "Educacao Infantil - Pré-Escola", + "skiprows": 8, + "table": "docente_faixa_etaria_sexo", + }, + "ensino_fundamental": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental"], + #"chave": "2.18", + "chave": "2.15", # Para anos anteriores a 2010 + "valor": "Ensino Fundamental", + "skiprows": 8, + "table": "docente_faixa_etaria_sexo", + }, + "ensino_fundamental_anos_iniciais": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental - Anos Iniciais"], + #"chave": "2.22", + "chave": "2.18", # Para anos anteriores a 2010 + "valor": "Ensino Fundamental - Anos Iniciais", + "skiprows": 8, + "table": "docente_faixa_etaria_sexo", + }, + "ensino_fundamental_anos_finais": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental - Anos Finais"], + # "chave": "2.26", + "chave": "2.21", # Para anos anteriores a 2010 + "valor": "Ensino Fundamental - Anos Finais", + "skiprows": 8, + "table": "docente_faixa_etaria_sexo", + }, + "ensino_medio": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Médio"], + #"chave": "2.30", + "chave": "2.24", # Para anos anteriores a 2010 + "valor": "Ensino Médio", + "skiprows": 8, + "table": "docente_faixa_etaria_sexo", + }, + "educacao_profissional": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Profissional"], + "chave": "2.35", + "chave": "2.28", # Para anos anteriores a 2010 + "valor": "Educacao Profissional", + "skiprows": 8, + "table": "docente_faixa_etaria_sexo", + }, + "EJA": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["EJA"], + #"chave": "2.40", + "chave": "2.32", # Para anos anteriores a 2010 + "valor": "EJA", + "skiprows": 8, + "table": "docente_faixa_etaria_sexo", + }, + "educacao_especial_classes_comuns": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Especial - Classes Comuns"], + #"chave": "2.46", + "chave": "2.37", # Para anos anteriores a 2010 + "valor": "Educacao Especial - Classes Comuns", + "skiprows": 8, + "table": "docente_faixa_etaria_sexo", + }, + "educacao_especial_classes_exclusivas": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE[ + "Educacao Especial - Classes Exclusivas" + ], + #"chave": "2.52", + #"chave": "2.51", # Para o ano de 2011 + "chave": "2.41", # Para o ano anteriores a 2010 + "valor": "Educacao Especial - Classes Exclusivas", + "skiprows": 8, + "table": "docente_faixa_etaria_sexo", + }, +} + + +def read_sheet( + table: str, ano: int, chave: str, valor: str, dicionario: dict, skiprows: int = 9 +) -> pd.DataFrame: + print("Tratando dados de", valor, ano) + path_excel = os.path.join( + INPUT, + # f"sinopse_estatística_educaç╞o_básica_{ano}", + # f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}.xlsx", + f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}", + f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}.xlsx", + ) + df = pd.read_excel( + path_excel, + skiprows=skiprows, + sheet_name=chave, + ) + + sheets_etapa_ensino_serie = {chave: valor} + + dfs_faixa_etaria = { + name: pd.read_excel( + path_excel, + skiprows=skiprows, + sheet_name=sheet_name, + ) + for sheet_name, name in sheets_etapa_ensino_serie.items() + } + + dataframes = {} + for table_name, columns in dfs_faixa_etaria.items(): + df = pd.DataFrame(columns) + dataframes[table_name] = df + + print(df.columns) + + def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame: + cols_drop = [ + col + for col in df.columns + if col.startswith("Unnamed") or col.startswith("Total") + ] + + return df.drop(columns=cols_drop) + + dfs_faixa_etaria = { + name: drop_unused_columns( + df.rename(columns=RENAMES_ETAPA_ENSINO_SERIE[name], errors="raise") + ) + for name, df in dfs_faixa_etaria.items() + } + + df_faixa_etaria = pd.concat( + [ + df.pipe( + lambda d: d.loc[(d["id_municipio"].notna()) & (d["id_municipio"] != " "),] + ) + .pipe( + lambda d: pd.melt( + d, + id_vars=["id_municipio", "uf"], + value_vars=d.columns.difference( + ["id_municipio", "uf"] + ).tolist(), # Convert to list + var_name="faixa_etaria", + value_name="quantidade_docente", + ) + ) + .assign(tipo_classe=tipo_classe) + for tipo_classe, df in dfs_faixa_etaria.items() + ] + ) + + bd_dir = bd.read_sql( + "SELECT nome, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`", + billing_project_id="basedosdados", + reauth=False, + ) + + df_faixa_etaria["uf"] = ( + df_faixa_etaria["uf"] + .apply(lambda uf: uf.strip()) + .replace({i["nome"]: i["sigla"] for i in bd_dir.to_dict("records")}) # type: ignore + ) + + df_faixa_etaria = df_faixa_etaria.rename(columns={"uf": "sigla_uf"}, errors="raise") + + df_faixa_etaria["sexo"] = df_faixa_etaria["faixa_etaria"].apply( + lambda v: v.split("_")[-1] + ) + + df_faixa_etaria["faixa_etaria"] = df_faixa_etaria["faixa_etaria"].apply( + lambda v: v.split("_")[0] + ) + + df_faixa_etaria["quantidade_docente"] = df_faixa_etaria["quantidade_docente"].astype( + int + ) + + print("Particionando dados") + for sigla_uf, df in df_faixa_etaria.groupby("sigla_uf"): + path = os.path.join(OUTPUT, f"{table}", f"ano={ano}", f"sigla_uf={sigla_uf}") + if not os.path.exists(path): + os.makedirs(path, exist_ok=True) + df.drop(columns=["sigla_uf"]).to_csv( + os.path.join(path, "data.csv"), index=False, mode="w" + ) + else: + df.drop(columns=["sigla_uf"]).to_csv( + os.path.join(path, "data.csv"), index=False, mode="a", header=False + ) + +if __name__ == '__main__' : + lista = [ + "educacao_basica", + "educacao_infantil_creche", + "educacao_infantil_pre_escola", + "ensino_fundamental", + "ensino_fundamental_anos_iniciais", + "ensino_fundamental_anos_finais", + "ensino_medio", + "educacao_profissional", + "EJA", + "educacao_especial_classes_comuns", + "educacao_especial_classes_exclusivas", + ] + + for x in lista: + read_sheet( + table=localizacao[x]["table"], + ano=2007, + chave=localizacao[x]["chave"], + valor=localizacao[x]["valor"], + dicionario=localizacao[x]["dicionario"], + skiprows=localizacao[x]["skiprows"], + ) diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_localizacao.py b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_localizacao.py new file mode 100644 index 00000000..dc546fbf --- /dev/null +++ b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_localizacao.py @@ -0,0 +1,544 @@ +import os +import zipfile +import pandas as pd +import basedosdados as bd +import numpy as np + + +INPUT = os.path.join(os.getcwd(), "input") +OUTPUT = os.path.join(os.getcwd(), "output") + +# os.makedirs(INPUT, exist_ok=True) +# os.makedirs(OUTPUT, exist_ok=True) + +RENAMES_ETAPA_ENSINO_SERIE = { + "Educacao Basica": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Dependência Administrativa_Pública", + "Federal": "Dependência Administrativa_Federal", + "Estadual": "Dependência Administrativa_Estadual", + "Municipal": "Dependência Administrativa_Municipal", + "Privada": "Dependência Administrativa_Privada", + "Pública.1": "Urbana_Pública", + "Federal.1": "Urbana_Federal", + "Estadual.1": "Urbana_Estadual", + "Municipal.1": "Urbana_Municipal", + "Privada.1": "Urbana_Privada", + "Pública.2": "Rural_Pública", + "Federal.2": "Rural_Federal", + "Estadual.2": "Rural_Estadual", + "Municipal.2": "Rural_Municipal", + "Privada.2": "Rural_Privada", + }, + "Educacao Infantil": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Dependência Administrativa_Pública", + "Federal": "Dependência Administrativa_Federal", + "Estadual": "Dependência Administrativa_Estadual", + "Municipal": "Dependência Administrativa_Municipal", + "Privada": "Dependência Administrativa_Privada", + "Pública.1": "Urbana_Pública", + "Federal.1": "Urbana_Federal", + "Estadual.1": "Urbana_Estadual", + "Municipal.1": "Urbana_Municipal", + "Privada.1": "Urbana_Privada", + "Pública.2": "Rural_Pública", + "Federal.2": "Rural_Federal", + "Estadual.2": "Rural_Estadual", + "Municipal.2": "Rural_Municipal", + "Privada.2": "Rural_Privada", + }, + "Educacao Infantil - Creche": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Dependência Administrativa_Pública", + "Federal": "Dependência Administrativa_Federal", + "Estadual": "Dependência Administrativa_Estadual", + "Municipal": "Dependência Administrativa_Municipal", + "Privada": "Dependência Administrativa_Privada", + "Pública.1": "Urbana_Pública", + "Federal.1": "Urbana_Federal", + "Estadual.1": "Urbana_Estadual", + "Municipal.1": "Urbana_Municipal", + "Privada.1": "Urbana_Privada", + "Pública.2": "Rural_Pública", + "Federal.2": "Rural_Federal", + "Estadual.2": "Rural_Estadual", + "Municipal.2": "Rural_Municipal", + "Privada.2": "Rural_Privada", + #### + # Para 2014 + #### + # "Unnamed: 1": "uf", + # "Unnamed: 3": "id_municipio", + # "Pública ": "Dependência Administrativa_Pública", + # "Federal": "Dependência Administrativa_Federal", + # "Estadual": "Dependência Administrativa_Estadual", + # "Municipal": "Dependência Administrativa_Municipal", + # "Privada": "Dependência Administrativa_Privada", + # "Pública": "Urbana_Pública", + # "Federal.1": "Urbana_Federal", + # "Estadual.1": "Urbana_Estadual", + # "Municipal.1": "Urbana_Municipal", + # "Privada.1": "Urbana_Privada", + # "Pública.1": "Rural_Pública", + # "Federal.2": "Rural_Federal", + # "Estadual.2": "Rural_Estadual", + # "Municipal.2": "Rural_Municipal", + # "Privada.2": "Rural_Privada", + }, + "Educacao Infantil - Pré-Escola": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Dependência Administrativa_Pública", + "Federal": "Dependência Administrativa_Federal", + "Estadual": "Dependência Administrativa_Estadual", + "Municipal": "Dependência Administrativa_Municipal", + "Privada": "Dependência Administrativa_Privada", + "Pública.1": "Urbana_Pública", + "Federal.1": "Urbana_Federal", + "Estadual.1": "Urbana_Estadual", + "Municipal.1": "Urbana_Municipal", + "Privada.1": "Urbana_Privada", + "Pública.2": "Rural_Pública", + "Federal.2": "Rural_Federal", + "Estadual.2": "Rural_Estadual", + "Municipal.2": "Rural_Municipal", + "Privada.2": "Rural_Privada", + }, + "Ensino Fundamental": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Dependência Administrativa_Pública", + "Federal": "Dependência Administrativa_Federal", + "Estadual": "Dependência Administrativa_Estadual", + "Municipal": "Dependência Administrativa_Municipal", + "Privada": "Dependência Administrativa_Privada", + "Pública.1": "Urbana_Pública", + "Federal.1": "Urbana_Federal", + "Estadual.1": "Urbana_Estadual", + "Municipal.1": "Urbana_Municipal", + "Privada.1": "Urbana_Privada", + "Pública.2": "Rural_Pública", + "Federal.2": "Rural_Federal", + "Estadual.2": "Rural_Estadual", + "Municipal.2": "Rural_Municipal", + "Privada.2": "Rural_Privada", + }, + "Ensino Fundamental - Anos Iniciais": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Dependência Administrativa_Pública", + "Federal": "Dependência Administrativa_Federal", + "Estadual": "Dependência Administrativa_Estadual", + "Municipal": "Dependência Administrativa_Municipal", + "Privada": "Dependência Administrativa_Privada", + "Pública.1": "Urbana_Pública", + "Federal.1": "Urbana_Federal", + "Estadual.1": "Urbana_Estadual", + "Municipal.1": "Urbana_Municipal", + "Privada.1": "Urbana_Privada", + "Pública.2": "Rural_Pública", + "Federal.2": "Rural_Federal", + "Estadual.2": "Rural_Estadual", + "Municipal.2": "Rural_Municipal", + "Privada.2": "Rural_Privada", + }, + "Ensino Fundamental - Anos Finais": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Dependência Administrativa_Pública", + "Federal": "Dependência Administrativa_Federal", + "Estadual": "Dependência Administrativa_Estadual", + "Municipal": "Dependência Administrativa_Municipal", + "Privada": "Dependência Administrativa_Privada", + "Pública.1": "Urbana_Pública", + "Federal.1": "Urbana_Federal", + "Estadual.1": "Urbana_Estadual", + "Municipal.1": "Urbana_Municipal", + "Privada.1": "Urbana_Privada", + "Pública.2": "Rural_Pública", + "Federal.2": "Rural_Federal", + "Estadual.2": "Rural_Estadual", + "Municipal.2": "Rural_Municipal", + "Privada.2": "Rural_Privada", + }, + "Ensino Médio": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Dependência Administrativa_Pública", + "Federal": "Dependência Administrativa_Federal", + "Estadual": "Dependência Administrativa_Estadual", + "Municipal": "Dependência Administrativa_Municipal", + "Privada": "Dependência Administrativa_Privada", + "Pública.1": "Urbana_Pública", + "Federal.1": "Urbana_Federal", + "Estadual.1": "Urbana_Estadual", + "Municipal.1": "Urbana_Municipal", + "Privada.1": "Urbana_Privada", + "Pública.2": "Rural_Pública", + "Federal.2": "Rural_Federal", + "Estadual.2": "Rural_Estadual", + "Municipal.2": "Rural_Municipal", + "Privada.2": "Rural_Privada", + }, + "Educacao Profissional": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Dependência Administrativa_Pública", + "Federal": "Dependência Administrativa_Federal", + "Estadual": "Dependência Administrativa_Estadual", + "Municipal": "Dependência Administrativa_Municipal", + "Privada": "Dependência Administrativa_Privada", + "Pública.1": "Urbana_Pública", + "Federal.1": "Urbana_Federal", + "Estadual.1": "Urbana_Estadual", + "Municipal.1": "Urbana_Municipal", + "Privada.1": "Urbana_Privada", + "Pública.2": "Rural_Pública", + "Federal.2": "Rural_Federal", + "Estadual.2": "Rural_Estadual", + "Municipal.2": "Rural_Municipal", + "Privada.2": "Rural_Privada", + }, + "EJA": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Dependência Administrativa_Pública", + "Federal": "Dependência Administrativa_Federal", + "Estadual": "Dependência Administrativa_Estadual", + "Municipal": "Dependência Administrativa_Municipal", + "Privada": "Dependência Administrativa_Privada", + "Pública.1": "Urbana_Pública", + "Federal.1": "Urbana_Federal", + "Estadual.1": "Urbana_Estadual", + "Municipal.1": "Urbana_Municipal", + "Privada.1": "Urbana_Privada", + "Pública.2": "Rural_Pública", + "Federal.2": "Rural_Federal", + "Estadual.2": "Rural_Estadual", + "Municipal.2": "Rural_Municipal", + "Privada.2": "Rural_Privada", + #### + # Para 2014 + #### + # "Unnamed: 1": "uf", + # "Unnamed: 3": "id_municipio", + # "Publica ": "Dependência Administrativa_Pública", + # "Federal": "Dependência Administrativa_Federal", + # "Estadual": "Dependência Administrativa_Estadual", + # "Municipal": "Dependência Administrativa_Municipal", + # "Privada": "Dependência Administrativa_Privada", + # "Pública": "Urbana_Pública", + # "Federal.1": "Urbana_Federal", + # "Estadual.1": "Urbana_Estadual", + # "Municipal.1": "Urbana_Municipal", + # "Privada.1": "Urbana_Privada", + # "Pública.1": "Rural_Pública", + # "Federal.2": "Rural_Federal", + # "Estadual.2": "Rural_Estadual", + # "Municipal.2": "Rural_Municipal", + # "Privada.2": "Rural_Privada", + }, + "Educacao Especial - Classes Comuns": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Dependência Administrativa_Pública", + "Federal": "Dependência Administrativa_Federal", + "Estadual": "Dependência Administrativa_Estadual", + "Municipal": "Dependência Administrativa_Municipal", + "Privada": "Dependência Administrativa_Privada", + "Pública.1": "Urbana_Pública", + "Federal.1": "Urbana_Federal", + "Estadual.1": "Urbana_Estadual", + "Municipal.1": "Urbana_Municipal", + "Privada.1": "Urbana_Privada", + "Pública.2": "Rural_Pública", + "Federal.2": "Rural_Federal", + "Estadual.2": "Rural_Estadual", + "Municipal.2": "Rural_Municipal", + "Privada.2": "Rural_Privada", + ##### + # Para 2014 + #### + # "Unnamed: 1": "uf", + # "Unnamed: 3": "id_municipio", + # "Pública ": "Dependência Administrativa_Pública", + # "Federal": "Dependência Administrativa_Federal", + # "Estadual": "Dependência Administrativa_Estadual", + # "Municipal": "Dependência Administrativa_Municipal", + # "Privada": "Dependência Administrativa_Privada", + # "Pública": "Urbana_Pública", + # "Federal.1": "Urbana_Federal", + # "Estadual.1": "Urbana_Estadual", + # "Municipal.1": "Urbana_Municipal", + # "Privada.1": "Urbana_Privada", + # "Pública.1": "Rural_Pública", + # "Federal.2": "Rural_Federal", + # "Estadual.2": "Rural_Estadual", + # "Municipal.2": "Rural_Municipal", + # "Privada.2": "Rural_Privada", + }, + "Educacao Especial - Classes Exclusivas": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Dependência Administrativa_Pública", + "Federal": "Dependência Administrativa_Federal", + "Estadual": "Dependência Administrativa_Estadual", + "Municipal": "Dependência Administrativa_Municipal", + "Privada": "Dependência Administrativa_Privada", + "Pública.1": "Urbana_Pública", + "Federal.1": "Urbana_Federal", + "Estadual.1": "Urbana_Estadual", + "Municipal.1": "Urbana_Municipal", + "Privada.1": "Urbana_Privada", + "Pública.2": "Rural_Pública", + "Federal.2": "Rural_Federal", + "Estadual.2": "Rural_Estadual", + "Municipal.2": "Rural_Municipal", + "Privada.2": "Rural_Privada", + }, +} + + +localizacao = { + "educacao_basica": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Basica"], + "chave": "2.2", + "valor": "Educacao Basica", + "skiprows": 8, + "table": "docente_localizacao", + }, + "educacao_infantil": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil"], + "chave": "2.6", # Para o ano de 2010 + #"chave": "2.7", + "valor": "Educacao Infantil", + "skiprows": 8, + "table": "docente_localizacao", + }, + "ensino_infantil_creche": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil - Creche"], + "chave": "Creche 2.7", # Para o ano de 2010 + #"chave": "Creche 2.8", + "valor": "Educacao Infantil - Creche", + "skiprows": 8, + "table": "docente_localizacao", + }, + "educacao_infantil_pre_escola": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil - Pré-Escola"], + "chave": "Pré-Escola 2.10", # Para o ano de 2010 + #"chave": "Pré-Escola 2.12", + "valor": "Educacao Infantil - Pré-Escola", + "skiprows": 8, + "table": "docente_localizacao", + }, + "ensino_fundamental": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental"], + #"chave": "2.17", + "chave": "2.14", # Para o ano de 2010 + "valor": "Ensino Fundamental", + "skiprows": 8, + "table": "docente_localizacao", + }, + "ensino_fundamental_anos_iniciais": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental - Anos Iniciais"], + #"chave": "Anos Iniciais 2.21", + "chave": "Anos Iniciais 2.17", # Para o ano de 2010 + "valor": "Ensino Fundamental - Anos Iniciais", + "skiprows": 8, + "table": "docente_localizacao", + }, + "ensino_fundamental_anos_finais": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental - Anos Finais"], + #"chave": "Anos Finais 2.25", + "chave": "Anos Finais 2.20", # Para o ano de 2010 + "valor": "Ensino Fundamental - Anos Finais", + "skiprows": 8, + "table": "docente_localizacao", + }, + "ensino_medio": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Médio"], + #"chave": "Ensino Médio 2.29", + "chave": "Ensino Médio 2.23", # Para o ano de 2010 + "valor": "Ensino Médio", + "skiprows": 8, + "table": "docente_localizacao", + }, + "educacao_profissional": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Profissional"], + #"chave": "2.34", + "chave": "2.27", # Para o ano de 2010 + "valor": "Educacao Profissional", + "skiprows": 8, + "table": "docente_localizacao", + }, + "EJA": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["EJA"], + #"chave": "2.39", + "chave": "2.31", # Para o ano de 2010 + "valor": "EJA", + "skiprows": 8, + "table": "docente_localizacao", + }, + "educacao_especial_classes_comuns": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Especial - Classes Comuns"], + #"chave": "2.45", + "chave": "2.36", # Para o ano de 2010 + "valor": "Educacao Especial - Classes Comuns", + "skiprows": 8, + "table": "docente_localizacao", + }, + "educacao_especial_classes_exclusivas": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE[ + "Educacao Especial - Classes Exclusivas" + ], + #"chave": "2.51", + #"chave": "2.50", # Para o ano de 2011 + "chave": "2.40", # Para o ano de 2010 + "valor": "Educacao Especial - Classes Exclusivas", + "skiprows": 8, + "table": "docente_localizacao", + }, +} + + +def read_sheet( + table: str, ano: int, chave: str, valor: str, dicionario: dict, skiprows: int = 9 +) -> pd.DataFrame: + print("Tratando dados de", valor, ano) + path_excel = os.path.join( + INPUT, + f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}", + f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}.xlsx", + ) + df = pd.read_excel( + path_excel, + skiprows=skiprows, + sheet_name=chave, + ) + + sheets_etapa_ensino_serie = {chave: valor} + + df_localizacao = { + name: pd.read_excel( + path_excel, + skiprows=skiprows, + sheet_name=sheet_name, + ) + for sheet_name, name in sheets_etapa_ensino_serie.items() + } + + dataframes = {} + for table_name, columns in df_localizacao.items(): + df = pd.DataFrame(columns) + dataframes[table_name] = df + + print(df.columns) + + def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame: + cols_drop = [ + col + for col in df.columns + if col.startswith("Unnamed") or col.startswith("Total") + ] + + return df.drop(columns=cols_drop) + + dfs_localizacao = { + name: drop_unused_columns(df.rename(columns=dicionario, errors="raise")) + for name, df in df_localizacao.items() + } + + df_localizacao = pd.concat( + [ + df.pipe( + lambda d: d.loc[(d["id_municipio"].notna()) & (d["id_municipio"] != " "),] + ) + .pipe( + lambda d: pd.melt( + d, + id_vars=["id_municipio", "uf"], + value_vars=d.columns.difference( + ["id_municipio", "uf"] + ).tolist(), # Convert to list + var_name="localizacao", + value_name="quantidade_docente", + ) + ) + .assign(tipo_classe=tipo_classe) + for tipo_classe, df in dfs_localizacao.items() + ] + ) + + bd_dir = bd.read_sql( + "SELECT nome, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`", + billing_project_id="basedosdados", + reauth=False, + ) + + df_localizacao["uf"] = ( + df_localizacao["uf"] + .apply(lambda uf: uf.strip()) + .replace({i["nome"]: i["sigla"] for i in bd_dir.to_dict("records")}) # type: ignore + ) + + df_localizacao = df_localizacao.rename(columns={"uf": "sigla_uf"}, errors="raise") + + df_localizacao["rede"] = df_localizacao["localizacao"].apply(lambda v: v.split("_")[-1]) + + df_localizacao["localizacao"] = df_localizacao["localizacao"].apply( + lambda v: v.split("_")[0] + ) + df_localizacao["quantidade_docente"] = df_localizacao["quantidade_docente"].astype(int) + + df_localizacao = df_localizacao[ + [ + "sigla_uf", + "id_municipio", + "tipo_classe", + "rede", + "localizacao", + "quantidade_docente", + ] + ] + + print("Particionando dados") + for sigla_uf, df in df_localizacao.groupby("sigla_uf"): + path = os.path.join(OUTPUT, f"{table}", f"ano={ano}", f"sigla_uf={sigla_uf}") + if not os.path.exists(path): + os.makedirs(path, exist_ok=True) + df.drop(columns=["sigla_uf"]).to_csv( + os.path.join(path, "data.csv"), index=False, mode="w" + ) + else: + df.drop(columns=["sigla_uf"]).to_csv( + os.path.join(path, "data.csv"), index=False, mode="a", header=False + ) +if __name__ == "__main__": + lista = [ + "educacao_basica", + "educacao_infantil", + "ensino_infantil_creche", + "educacao_infantil_pre_escola", + "ensino_fundamental", + "ensino_fundamental_anos_iniciais", + "ensino_fundamental_anos_finais", + "ensino_medio", + "educacao_profissional", + "EJA", + "educacao_especial_classes_comuns", + "educacao_especial_classes_exclusivas" + ] + + for x in lista: + read_sheet( + table=localizacao[x]["table"], + ano=2007, + chave=localizacao[x]["chave"], + valor=localizacao[x]["valor"], + dicionario=localizacao[x]["dicionario"], + skiprows=localizacao[x]["skiprows"], + ) diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_regime_contratacao.py b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_regime_contratacao.py new file mode 100644 index 00000000..3e8e50a4 --- /dev/null +++ b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docente_regime_contratacao.py @@ -0,0 +1,586 @@ +import os +import zipfile +import pandas as pd +import basedosdados as bd +import numpy as np + + +INPUT = os.path.join(os.getcwd(), "input") +OUTPUT = os.path.join(os.getcwd(), "output") + +# os.makedirs(INPUT, exist_ok=True) +# os.makedirs(OUTPUT, exist_ok=True) + +##### +# Para anos anteriores a 2011 +##### + +# RENAMES_CONTRATO = { +# "Educacao Basica": { +# "Unnamed: 1": "uf", +# "Unnamed: 3": "id_municipio", +# "Federal": "Concursado_Federal", +# "Estadual": "Concursado_Estadual", +# "Municipal": "Concursado_Municipal", +# "Federal.1": "Contrato Temporário_Federal", +# "Estadual.1": "Contrato Temporário_Estadual", +# "Municipal.1": "Contrato Temporário_Municipal", +# "Federal.2": "Contrato Terceirizado_Federal", +# "Estadual.2": "Contrato Terceirizado_Estadual", +# "Municipal.2": "Contrato Terceirizado_Municipal", +# "Federal.3": "Contrato CLT_Federal", +# "Estadual.3": "Contrato CLT_Estadual", +# "Municipal.3": "Contrato CLT_Municipal", +# }, +# "Educacao Infantil - Creche": { +# "Unnamed: 1": "uf", +# "Unnamed: 3": "id_municipio", +# "Federal": "Concursado_Federal", +# "Estadual": "Concursado_Estadual", +# "Municipal": "Concursado_Municipal", +# "Federal.1": "Contrato Temporário_Federal", +# "Estadual.1": "Contrato Temporário_Estadual", +# "Municipal.1": "Contrato Temporário_Municipal", +# "Federal.2": "Contrato Terceirizado_Federal", +# "Estadual.2": "Contrato Terceirizado_Estadual", +# "Municipal.2": "Contrato Terceirizado_Municipal", +# "Federal.3": "Contrato CLT_Federal", +# "Estadual.3": "Contrato CLT_Estadual", +# "Municipal.3": "Contrato CLT_Municipal", +# }, +# "Educacao Infantil - Pré-Escola": { +# "Unnamed: 1": "uf", +# "Unnamed: 3": "id_municipio", +# "Federal": "Concursado_Federal", +# "Estadual": "Concursado_Estadual", +# "Municipal": "Concursado_Municipal", +# "Federal.1": "Contrato Temporário_Federal", +# "Estadual.1": "Contrato Temporário_Estadual", +# "Municipal.1": "Contrato Temporário_Municipal", +# "Federal.2": "Contrato Terceirizado_Federal", +# "Estadual.2": "Contrato Terceirizado_Estadual", +# "Municipal.2": "Contrato Terceirizado_Municipal", +# "Federal.3": "Contrato CLT_Federal", +# "Estadual.3": "Contrato CLT_Estadual", +# "Municipal.3": "Contrato CLT_Municipal", +# }, +# "Ensino Fundamental": { +# "Unnamed: 1": "uf", +# "Unnamed: 3": "id_municipio", +# "Federal": "Concursado_Federal", +# "Estadual": "Concursado_Estadual", +# "Municipal": "Concursado_Municipal", +# "Federal.1": "Contrato Temporário_Federal", +# "Estadual.1": "Contrato Temporário_Estadual", +# "Municipal.1": "Contrato Temporário_Municipal", +# "Federal.2": "Contrato Terceirizado_Federal", +# "Estadual.2": "Contrato Terceirizado_Estadual", +# "Municipal.2": "Contrato Terceirizado_Municipal", +# "Federal.3": "Contrato CLT_Federal", +# "Estadual.3": "Contrato CLT_Estadual", +# "Municipal.3": "Contrato CLT_Municipal", +# }, +# "Ensino Fundamental - Anos Iniciais": { +# "Unnamed: 1": "uf", +# "Unnamed: 3": "id_municipio", +# "Federal": "Concursado_Federal", +# "Estadual": "Concursado_Estadual", +# "Municipal": "Concursado_Municipal", +# "Federal.1": "Contrato Temporário_Federal", +# "Estadual.1": "Contrato Temporário_Estadual", +# "Municipal.1": "Contrato Temporário_Municipal", +# "Federal.2": "Contrato Terceirizado_Federal", +# "Estadual.2": "Contrato Terceirizado_Estadual", +# "Municipal.2": "Contrato Terceirizado_Municipal", +# "Federal.3": "Contrato CLT_Federal", +# "Estadual.3": "Contrato CLT_Estadual", +# "Municipal.3": "Contrato CLT_Municipal", +# }, +# "Ensino Fundamental - Anos Finais": { +# "Unnamed: 1": "uf", +# "Unnamed: 3": "id_municipio", +# "Federal": "Concursado_Federal", +# "Estadual": "Concursado_Estadual", +# "Municipal": "Concursado_Municipal", +# "Federal.1": "Contrato Temporário_Federal", +# "Estadual.1": "Contrato Temporário_Estadual", +# "Municipal.1": "Contrato Temporário_Municipal", +# "Federal.2": "Contrato Terceirizado_Federal", +# "Estadual.2": "Contrato Terceirizado_Estadual", +# "Municipal.2": "Contrato Terceirizado_Municipal", +# "Federal.3": "Contrato CLT_Federal", +# "Estadual.3": "Contrato CLT_Estadual", +# "Municipal.3": "Contrato CLT_Municipal", +# }, +# "Ensino Médio": { +# "Unnamed: 1": "uf", +# "Unnamed: 3": "id_municipio", +# "Federal": "Concursado_Federal", +# "Estadual": "Concursado_Estadual", +# "Municipal": "Concursado_Municipal", +# "Federal.1": "Contrato Temporário_Federal", +# "Estadual.1": "Contrato Temporário_Estadual", +# "Municipal.1": "Contrato Temporário_Municipal", +# "Federal.2": "Contrato Terceirizado_Federal", +# "Estadual.2": "Contrato Terceirizado_Estadual", +# "Municipal.2": "Contrato Terceirizado_Municipal", +# "Federal.3": "Contrato CLT_Federal", +# "Estadual.3": "Contrato CLT_Estadual", +# "Municipal.3": "Contrato CLT_Municipal", +# }, +# "Educacao Profissional": { +# "Unnamed: 1": "uf", +# "Unnamed: 3": "id_municipio", +# "Federal": "Concursado_Federal", +# "Estadual": "Concursado_Estadual", +# "Municipal": "Concursado_Municipal", +# "Federal.1": "Contrato Temporário_Federal", +# "Estadual.1": "Contrato Temporário_Estadual", +# "Municipal.1": "Contrato Temporário_Municipal", +# "Federal.2": "Contrato Terceirizado_Federal", +# "Estadual.2": "Contrato Terceirizado_Estadual", +# "Municipal.2": "Contrato Terceirizado_Municipal", +# "Federal.3": "Contrato CLT_Federal", +# "Estadual.3": "Contrato CLT_Estadual", +# "Municipal.3": "Contrato CLT_Municipal", +# }, +# "EJA": { +# "Unnamed: 1": "uf", +# "Unnamed: 3": "id_municipio", +# "Federal": "Concursado_Federal", +# "Estadual": "Concursado_Estadual", +# "Municipal": "Concursado_Municipal", +# "Federal.1": "Contrato Temporário_Federal", +# "Estadual.1": "Contrato Temporário_Estadual", +# "Municipal.1": "Contrato Temporário_Municipal", +# "Federal.2": "Contrato Terceirizado_Federal", +# "Estadual.2": "Contrato Terceirizado_Estadual", +# "Municipal.2": "Contrato Terceirizado_Municipal", +# "Federal.3": "Contrato CLT_Federal", +# "Estadual.3": "Contrato CLT_Estadual", +# "Municipal.3": "Contrato CLT_Municipal", +# }, +# "Educacao Especial - Classes Comuns": { +# "Unnamed: 1": "uf", +# "Unnamed: 3": "id_municipio", +# "Federal": "Concursado_Federal", +# "Estadual": "Concursado_Estadual", +# "Municipal": "Concursado_Municipal", +# "Federal.1": "Contrato Temporário_Federal", +# "Estadual.1": "Contrato Temporário_Estadual", +# "Municipal.1": "Contrato Temporário_Municipal", +# "Federal.2": "Contrato Terceirizado_Federal", +# "Estadual.2": "Contrato Terceirizado_Estadual", +# "Municipal.2": "Contrato Terceirizado_Municipal", +# "Federal.3": "Contrato CLT_Federal", +# "Estadual.3": "Contrato CLT_Estadual", +# "Municipal.3": "Contrato CLT_Municipal", +# }, +# "Educacao Especial - Classes Exclusivas": { +# "Unnamed: 1": "uf", +# "Unnamed: 3": "id_municipio", +# "Federal": "Concursado_Federal", +# "Estadual": "Concursado_Estadual", +# "Municipal": "Concursado_Municipal", +# "Federal.1": "Contrato Temporário_Federal", +# "Estadual.1": "Contrato Temporário_Estadual", +# "Municipal.1": "Contrato Temporário_Municipal", +# "Federal.2": "Contrato Terceirizado_Federal", +# "Estadual.2": "Contrato Terceirizado_Estadual", +# "Municipal.2": "Contrato Terceirizado_Municipal", +# "Federal.3": "Contrato CLT_Federal", +# "Estadual.3": "Contrato CLT_Estadual", +# "Municipal.3": "Contrato CLT_Municipal", +# }, +# } + +RENAMES_CONTRATO = { # Para anos anteriores a 2011 + "Educacao Basica": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Federal": "Concursado_Federal", + "Estadual": "Concursado_Estadual", + "Municipal": "Concursado_Municipal", + "Federal.1": "Contrato Temporário_Federal", + "Estadual.1": "Contrato Temporário_Estadual", + "Municipal.1": "Contrato Temporário_Municipal", + "Federal.2": "Contrato Terceirizado_Federal", + "Estadual.2": "Contrato Terceirizado_Estadual", + "Municipal.2": "Contrato Terceirizado_Municipal", + + }, + "Educacao Infantil - Creche": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Federal": "Concursado_Federal", + "Estadual": "Concursado_Estadual", + "Municipal": "Concursado_Municipal", + "Federal.1": "Contrato Temporário_Federal", + "Estadual.1": "Contrato Temporário_Estadual", + "Municipal.1": "Contrato Temporário_Municipal", + "Federal.2": "Contrato Terceirizado_Federal", + "Estadual.2": "Contrato Terceirizado_Estadual", + "Municipal.2": "Contrato Terceirizado_Municipal", + + }, + "Educacao Infantil - Pré-Escola": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Federal": "Concursado_Federal", + "Estadual": "Concursado_Estadual", + "Municipal": "Concursado_Municipal", + "Federal.1": "Contrato Temporário_Federal", + "Estadual.1": "Contrato Temporário_Estadual", + "Municipal.1": "Contrato Temporário_Municipal", + "Federal.2": "Contrato Terceirizado_Federal", + "Estadual.2": "Contrato Terceirizado_Estadual", + "Municipal.2": "Contrato Terceirizado_Municipal", + + }, + "Ensino Fundamental": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Federal": "Concursado_Federal", + "Estadual": "Concursado_Estadual", + "Municipal": "Concursado_Municipal", + "Federal.1": "Contrato Temporário_Federal", + "Estadual.1": "Contrato Temporário_Estadual", + "Municipal.1": "Contrato Temporário_Municipal", + "Federal.2": "Contrato Terceirizado_Federal", + "Estadual.2": "Contrato Terceirizado_Estadual", + "Municipal.2": "Contrato Terceirizado_Municipal", + + }, + "Ensino Fundamental - Anos Iniciais": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Federal": "Concursado_Federal", + "Estadual": "Concursado_Estadual", + "Municipal": "Concursado_Municipal", + "Federal.1": "Contrato Temporário_Federal", + "Estadual.1": "Contrato Temporário_Estadual", + "Municipal.1": "Contrato Temporário_Municipal", + "Federal.2": "Contrato Terceirizado_Federal", + "Estadual.2": "Contrato Terceirizado_Estadual", + "Municipal.2": "Contrato Terceirizado_Municipal", + + }, + "Ensino Fundamental - Anos Finais": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Federal": "Concursado_Federal", + "Estadual": "Concursado_Estadual", + "Municipal": "Concursado_Municipal", + "Federal.1": "Contrato Temporário_Federal", + "Estadual.1": "Contrato Temporário_Estadual", + "Municipal.1": "Contrato Temporário_Municipal", + "Federal.2": "Contrato Terceirizado_Federal", + "Estadual.2": "Contrato Terceirizado_Estadual", + "Municipal.2": "Contrato Terceirizado_Municipal", + + }, + "Ensino Médio": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Federal": "Concursado_Federal", + "Estadual": "Concursado_Estadual", + "Municipal": "Concursado_Municipal", + "Federal.1": "Contrato Temporário_Federal", + "Estadual.1": "Contrato Temporário_Estadual", + "Municipal.1": "Contrato Temporário_Municipal", + "Federal.2": "Contrato Terceirizado_Federal", + "Estadual.2": "Contrato Terceirizado_Estadual", + "Municipal.2": "Contrato Terceirizado_Municipal", + + }, + "Educacao Profissional": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Federal": "Concursado_Federal", + "Estadual": "Concursado_Estadual", + "Municipal": "Concursado_Municipal", + "Federal.1": "Contrato Temporário_Federal", + "Estadual.1": "Contrato Temporário_Estadual", + "Municipal.1": "Contrato Temporário_Municipal", + "Federal.2": "Contrato Terceirizado_Federal", + "Estadual.2": "Contrato Terceirizado_Estadual", + "Municipal.2": "Contrato Terceirizado_Municipal", + + }, + "EJA": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Federal": "Concursado_Federal", + "Estadual": "Concursado_Estadual", + "Municipal": "Concursado_Municipal", + "Federal.1": "Contrato Temporário_Federal", + "Estadual.1": "Contrato Temporário_Estadual", + "Municipal.1": "Contrato Temporário_Municipal", + "Federal.2": "Contrato Terceirizado_Federal", + "Estadual.2": "Contrato Terceirizado_Estadual", + "Municipal.2": "Contrato Terceirizado_Municipal", + + }, + "Educacao Especial - Classes Comuns": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Federal": "Concursado_Federal", + "Estadual": "Concursado_Estadual", + "Municipal": "Concursado_Municipal", + "Federal.1": "Contrato Temporário_Federal", + "Estadual.1": "Contrato Temporário_Estadual", + "Municipal.1": "Contrato Temporário_Municipal", + "Federal.2": "Contrato Terceirizado_Federal", + "Estadual.2": "Contrato Terceirizado_Estadual", + "Municipal.2": "Contrato Terceirizado_Municipal", + + }, + "Educacao Especial - Classes Exclusivas": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Federal": "Concursado_Federal", + "Estadual": "Concursado_Estadual", + "Municipal": "Concursado_Municipal", + "Federal.1": "Contrato Temporário_Federal", + "Estadual.1": "Contrato Temporário_Estadual", + "Municipal.1": "Contrato Temporário_Municipal", + "Federal.2": "Contrato Terceirizado_Federal", + "Estadual.2": "Contrato Terceirizado_Estadual", + "Municipal.2": "Contrato Terceirizado_Municipal", + + }, +} + + +regime_contrato = { + "educacao_basica": { + "dicionario": RENAMES_CONTRATO["Educacao Basica"], + "chave": "2.5", + "valor": "Educacao Basica", + # "skiprows": 8, + "skiprows": 9, # Para o ano de 2021 + "table": "docente_regime_contrato", + }, + "ensino_infantil_creche": { + "dicionario": RENAMES_CONTRATO["Educacao Infantil - Creche"], + "chave": "2.11", + "valor": "Educacao Infantil - Creche", + # "skiprows": 8, + "skiprows": 9, # Para o ano de 2021 + "table": "docente_regime_contrato", + }, + "educacao_infantil_pre_escola": { + "dicionario": RENAMES_CONTRATO["Educacao Infantil - Pré-Escola"], + "chave": "2.15", + "valor": "Educacao Infantil - Pré-Escola", + # "skiprows": 8, + "skiprows": 9, # Para o ano de 2021 + "table": "docente_regime_contrato", + }, + "ensino_fundamental": { + "dicionario": RENAMES_CONTRATO["Ensino Fundamental"], + "chave": "2.20", + "valor": "Ensino Fundamental", + # "skiprows": 8, + "skiprows": 9, # Para o ano de 2021 + "table": "docente_regime_contrato", + }, + "ensino_fundamental_anos_iniciais": { + "dicionario": RENAMES_CONTRATO["Ensino Fundamental - Anos Iniciais"], + "chave": "2.24", + "valor": "Ensino Fundamental - Anos Iniciais", + # "skiprows": 8, + "skiprows": 9, # Para o ano de 2021 + "table": "docente_regime_contrato", + }, + "ensino_fundamental_anos_finais": { + "dicionario": RENAMES_CONTRATO["Ensino Fundamental - Anos Finais"], + "chave": "2.28", + "valor": "Ensino Fundamental - Anos Finais", + # "skiprows": 8, + "skiprows": 9, # Para o ano de 2021 + "table": "docente_regime_contrato", + }, + "ensino_medio": { + "dicionario": RENAMES_CONTRATO["Ensino Médio"], + "chave": "2.32", + "valor": "Ensino Médio", + # "skiprows": 8, + "skiprows": 9, # Para o ano de 2021 + "table": "docente_regime_contrato", + }, + "educacao_profissional": { + "dicionario": RENAMES_CONTRATO["Educacao Profissional"], + "chave": "2.37", + "valor": "Educacao Profissional", + # "skiprows": 8, + "skiprows": 9, # Para o ano de 2021 + "table": "docente_regime_contrato", + }, + "EJA": { + "dicionario": RENAMES_CONTRATO["EJA"], + "chave": "2.42", + "valor": "EJA", + # "skiprows": 8, + "skiprows": 9, # Para o ano de 2021 + "table": "docente_regime_contrato", + }, + "educacao_especial_classes_comuns": { + "dicionario": RENAMES_CONTRATO["Educacao Especial - Classes Comuns"], + # "chave": "2.49", + "chave": "2.48", # Para o ano de 2010 + "valor": "Educacao Especial - Classes Comuns", + # "skiprows": 8, + "skiprows": 9, # Para o ano de 2021 + #"skiprows": 10, # Para o ano de 2011 + "table": "docente_regime_contrato", + }, + "educacao_especial_classes_exclusivas": { + "dicionario": RENAMES_CONTRATO["Educacao Especial - Classes Exclusivas"], + # "chave": "2.55", + "chave": "2.53", # Para o ano de 2021 + "valor": "Educacao Especial - Classes Exclusivas", + # "skiprows": 8, + "skiprows": 9, # Para o ano de 2021 + #"skiprows": 10, # Para o ano de 2011 + "table": "docente_regime_contrato", + }, +} + + +def read_sheet( + table: str, ano: int, chave: str, valor: str, dicionario: dict, skiprows +) -> pd.DataFrame: + print("Tratando dados de", valor, ano) + path_excel = os.path.join( + INPUT, + f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}", + f"Sinopse_Estatistica_da_Educaç╞o_Basica_{ano}.xlsx", + ) + + df = pd.read_excel( + path_excel, + skiprows=skiprows, + sheet_name=chave, + ) + + sheets_etapa_ensino_serie = {chave: valor} + + dfs_regime_contrato = { + name: pd.read_excel( + path_excel, skiprows=skiprows, sheet_name=sheet_name + ) + for sheet_name, name in sheets_etapa_ensino_serie.items() + } + + dataframes = {} + + for table_name, columns in dfs_regime_contrato.items(): + df = pd.DataFrame(columns) # Create DataFrame for each table + dataframes[table_name] = df # Store the DataFrame in a dictionary + + print(df.columns) + + def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame: + cols_drop = [ + col + for col in df.columns + if col.startswith("Unnamed") or col.startswith("Total") + ] + + return df.drop(columns=cols_drop) + + dfs_regime_contrato = { + name: drop_unused_columns(df.rename(columns=dicionario, errors="raise")) + for name, df in dfs_regime_contrato.items() + } + + df_regime_contrato = pd.concat( + [ + df.pipe( + lambda d: d.loc[ + (d["id_municipio"].notna()) & (d["id_municipio"] != " "), + ] + ) + .pipe( + lambda d: pd.melt( + d, + id_vars=["id_municipio", "uf"], + value_vars=d.columns.difference( + ["id_municipio", "uf"] + ).tolist(), # Convert to list + var_name="regime_contrato", + value_name="quantidade_docente", + ) + ) + .assign(tipo_classe=tipo_classe) + for tipo_classe, df in dfs_regime_contrato.items() + ] + ) + + bd_dir = bd.read_sql( + "SELECT nome, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`", + billing_project_id="basedosdados", + reauth=False, + ) + + df_regime_contrato["uf"] = df_regime_contrato["uf"].apply(lambda uf: uf.strip()).replace({i["nome"]: i["sigla"] for i in bd_dir.to_dict("records")}) # type: ignore + + df_regime_contrato = df_regime_contrato.rename( + columns={"uf": "sigla_uf"}, errors="raise" + ) + + df_regime_contrato["rede"] = df_regime_contrato["regime_contrato"].apply( + lambda v: v.split("_")[-1] + ) + + df_regime_contrato["regime_contrato"] = df_regime_contrato["regime_contrato"].apply( + lambda v: v.split("_")[0] + ) + + df_regime_contrato["quantidade_docente"] = df_regime_contrato[ + "quantidade_docente" + ].astype(int) + + df_regime_contrato["quantidade_docente"] = df_regime_contrato[ + "quantidade_docente" + ].astype(int) + + print("Particionando dados") + for sigla_uf, df in df_regime_contrato.groupby("sigla_uf"): + path = os.path.join(OUTPUT, f"{table}", f"ano={ano}", f"sigla_uf={sigla_uf}") + if not os.path.exists(path): + os.makedirs(path, exist_ok=True) + df.drop(columns=["sigla_uf"]).to_csv( + os.path.join(path, "data.csv"), index=False, mode="w" + ) + else: + df.drop(columns=["sigla_uf"]).to_csv( + os.path.join(path, "data.csv"), index=False, mode="a", header=False + ) + +if __name__ == "__main__": + lista = [ + "educacao_basica", + "ensino_infantil_creche", + "educacao_infantil_pre_escola", + "ensino_fundamental", + "ensino_fundamental_anos_iniciais", + "ensino_fundamental_anos_finais", + "ensino_medio", + "educacao_profissional", + "EJA", + "educacao_especial_classes_comuns", + "educacao_especial_classes_exclusivas", + ] + + for x in lista: + # for ano in range(2012, 2019): + read_sheet( + table=regime_contrato[x]["table"], + ano=2010, + chave=regime_contrato[x]["chave"], + valor=regime_contrato[x]["valor"], + dicionario=regime_contrato[x]["dicionario"], + skiprows=regime_contrato[x]["skiprows"], + ) diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docentes_etapa_ensino.py b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docentes_etapa_ensino.py new file mode 100644 index 00000000..0cfbb7e3 --- /dev/null +++ b/models/br_inep_sinopse_estatistica_educacao_basica/code_docente/docentes_etapa_ensino.py @@ -0,0 +1,489 @@ +import os +import zipfile +import pandas as pd +import basedosdados as bd +import numpy as np + +INPUT = os.path.join(os.getcwd(), "input") +OUTPUT = os.path.join(os.getcwd(), "output") + +# os.makedirs(INPUT, exist_ok=True) +# os.makedirs(OUTPUT, exist_ok=True) + +RENAMES_ETAPA_ENSINO_SERIE = { + "Educacao Basica": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Creche": "Educação Infantil - Creche", + "Pré-Escola11": "Educação Infantil - Pré Escola", + "Total12": "", + "Anos Iniciais13": "Ensino Fundamental - Anos Iniciais", + "Anos Finais14": "Ensino Fundamental - Anos Finais", + "Ensino Médio Propedêutico": "Ensino Médio - Propedêutico", + "Ensino Médio Normal/Magistério": "Ensino Médio - Normal/Magistério", + "Curso Técnico Integrado (Ensino Médio Integrado)": "Ensino Médio - Curso Técnico Integrado", + "Associada ao Ensino Médio18": "Educação Profissional Técnica de Nível Médio - Associada ao Ensino Médio", + "Curso Técnico Concomitante": "Educação Profissional Técnica de Nível Médio - Curso Técnico Concomitante", + "Curso Técnico Subsequente": "Educação Profissional Técnica de Nível Médio - Curso Técnico Subsequente", + "Curso Técnico Misto (Concomitante e Subsequente)": "Educação Profissional Técnica de Nível Médio - Curso Técnico Misto (Concomitante e Subsequente)", + "Curso FIC Concomitante": "Educação Profissional - Curso FIC Concomitante", + "Curso FIC Integrado na Modalidade EJA20": "Educação Profissional - Curso FIC Integrado na Modalidade EJA", + "Ensino Fundamental22": "EJA - Ensino Fundamental", + "Ensino Médio23": "EJA - Ensino Médio", + "Classes Comuns25": "Educação Especial - Classes Comuns", + "Classes Exclusivas26": "Educação Especial - Classes Exclusivas", + ##### + # Para valores anteriores a 2016 e 2013 + #### + # "Classes Comuns": "Educação Especial - Classes Comuns", + # "Classes Exclusivas": "Educação Especial - Classes Exclusivas", + }, + "Educacao Infantil": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Creche - Pública", + "Federal": "Creche - Federal", + "Estadual": "Creche - Estadual", + "Municipal": "Creche - Municipal", + "Privada": "Creche - Privada", + "Pública.1": "Pré-Escola - Pública", + "Federal.1": "Pré-Escola - Federal", + "Estadual.1": "Pré-Escola - Estadual", + "Municipal.1": "Pré-Escola - Municipal", + "Privada.1": "Pré-Escola - Privada", + }, + "Ensino Fundamental": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Anos Iniciais - Pública", + "Federal": "Anos Iniciais - Federal", + "Estadual": "Anos Iniciais - Estadual", + "Municipal": "Anos Iniciais - Municipal", + "Privada": "Anos Iniciais - Privada", + "Pública.1": "Anos Finais - Pública", + "Federal.1": "Anos Finais - Federal", + "Estadual.1": "Anos Finais - Estadual", + "Municipal.1": "Anos Finais - Municipal", + "Privada.1": "Anos Finais - Privada", + "Pública.2": "Turmas Multi - Pública", + "Federal.2": "Turmas Multi - Federal", + "Estadual.2": "Turmas Multi - Estadual", + "Municipal.2": "Turmas Multi - Municipal", + "Privada.2": "Turmas Multi - Privada", + }, + "Educacao Profissional": #{ + + # "Unnamed: 1": "uf", + # "Unnamed: 3": "id_municipio", + # "Pública": "Curso Técnico Integrado (Ensino Médio Integrado) - Pública", + # "Federal": "Curso Técnico Integrado (Ensino Médio Integrado) - Federal", + # "Estadual": "Curso Técnico Integrado (Ensino Médio Integrado) - Estadual", + # "Municipal": "Curso Técnico Integrado (Ensino Médio Integrado) - Municipal", + # "Privada": "Curso Técnico Integrado (Ensino Médio Integrado) - Privada", + # "Pública.1": "Ensino Médio Normal/Magistério - Pública", + # "Federal.1": "Ensino Médio Normal/Magistério - Federal", + # "Estadual.1": "Ensino Médio Normal/Magistério - Estadual", + # "Municipal.1": "Ensino Médio Normal/Magistério - Municipal", + # "Privada.1": "Ensino Médio Normal/Magistério - Privada", + # "Pública.2": "Curso Técnico Concomitante - Pública", + # "Federal.2": "Curso Técnico Concomitante - Federal", + # "Estadual.2": "Curso Técnico Concomitante - Estadual", + # "Municipal.2": "Curso Técnico Concomitante - Municipal", + # "Privada.2": "Curso Técnico Concomitante - Privada", + # "Pública.3": "Curso Técnico Subsequente - Pública", + # "Federal.3": "Curso Técnico Subsequente - Federal", + # "Estadual.3": "Curso Técnico Subsequente - Estadual", + # "Municipal.3": "Curso Técnico Subsequente - Municipal", + # "Privada.3": "Curso Técnico Subsequente - Privada", + # "Pública.4": "Curso Técnico Misto (Concomitante e Subsequente) - Pública", + # "Federal.4": "Curso Técnico Misto (Concomitante e Subsequente) - Federal", + # "Estadual.4": "Curso Técnico Misto (Concomitante e Subsequente) - Estadual", + # "Municipal.4": "Curso Técnico Misto (Concomitante e Subsequente) - Municipal", + # "Privada.4": "Curso Técnico Misto (Concomitante e Subsequente) - Privada", + # "Pública.5": "Curso Técnico Integrado a EJA - Pública", + # "Federal.5": "Curso Técnico Integrado a EJA - Federal", + # "Estadual.5": "Curso Técnico Integrado a EJA - Estadual", + # "Municipal.5": "Curso Técnico Integrado a EJA - Municipal", + # "Privada.5": "Curso Técnico Integrado a EJA - Privada", + # "Pública.6": "EJA Ensino Fundamental Projovem Urbano - Pública", + # "Federal.6": "EJA Ensino Fundamental Projovem Urbano - Federal", + # "Estadual.6": "EJA Ensino Fundamental Projovem Urbano - Estadual", + # "Municipal.6": "EJA Ensino Fundamental Projovem Urbano - Municipal", + # "Privada.6": "EJA Ensino Fundamental Projovem Urbano - Privada", + # "Pública.7": "Curso FIC Concomitante - Pública", + # "Federal.7": "Curso FIC Concomitante - Federal", + # "Estadual.7": "Curso FIC Concomitante - Estadual", + # "Municipal.7": "Curso FIC Concomitante - Municipal", + # "Privada.7": "Curso FIC Concomitante - Privada", + # "Pública.8": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Pública", + # "Federal.8": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Federal", + # "Estadual.8": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Estadual", + # "Municipal.8": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Municipal", + # "Privada.8": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Privada", + # "Pública.9": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Pública", + # "Federal.9": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Federal", + # "Estadual.9": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Estadual", + # "Municipal.9": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Municipal", + # "Privada.9": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Privada", + #}, + ##### + # Valores antes depois de 2018 + #### + { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Curso Técnico Integrado (Ensino Médio Integrado) - Pública", + "Federal": "Curso Técnico Integrado (Ensino Médio Integrado) - Federal", + "Estadual": "Curso Técnico Integrado (Ensino Médio Integrado) - Estadual", + "Municipal": "Curso Técnico Integrado (Ensino Médio Integrado) - Municipal", + "Privada": "Curso Técnico Integrado (Ensino Médio Integrado) - Privada", + "Pública.1": "Ensino Médio Normal/Magistério - Pública", + "Federal.1": "Ensino Médio Normal/Magistério - Federal", + "Estadual.1": "Ensino Médio Normal/Magistério - Estadual", + "Municipal.1": "Ensino Médio Normal/Magistério - Municipal", + "Privada.1": "Ensino Médio Normal/Magistério - Privada", + "Pública.2": "Curso Técnico Concomitante - Pública", + "Federal.2": "Curso Técnico Concomitante - Federal", + "Estadual.2": "Curso Técnico Concomitante - Estadual", + "Municipal.2": "Curso Técnico Concomitante - Municipal", + "Privada.2": "Curso Técnico Concomitante - Privada", + "Pública.3": "Curso Técnico Subsequente - Pública", + "Federal.3": "Curso Técnico Subsequente - Federal", + "Estadual.3": "Curso Técnico Subsequente - Estadual", + "Municipal.3": "Curso Técnico Subsequente - Municipal", + "Privada.3": "Curso Técnico Subsequente - Privada", + "Pública.4": "Curso Técnico Misto (Concomitante e Subsequente) - Pública", + "Federal.4": "Curso Técnico Misto (Concomitante e Subsequente) - Federal", + "Estadual.4": "Curso Técnico Misto (Concomitante e Subsequente) - Estadual", + "Municipal.4": "Curso Técnico Misto (Concomitante e Subsequente) - Municipal", + "Privada.4": "Curso Técnico Misto (Concomitante e Subsequente) - Privada", + "Pública.5": "Curso Técnico Integrado a EJA - Pública", + "Federal.5": "Curso Técnico Integrado a EJA - Federal", + "Estadual.5": "Curso Técnico Integrado a EJA - Estadual", + "Municipal.5": "Curso Técnico Integrado a EJA - Municipal", + "Privada.5": "Curso Técnico Integrado a EJA - Privada", + "Pública.6": "Curso FIC Concomitante - Pública", + "Federal.6": "Curso FIC Concomitante - Federal", + "Estadual.6": "Curso FIC Concomitante - Estadual", + "Municipal.6": "Curso FIC Concomitante - Municipal", + "Privada.6": "Curso FIC Concomitante - Privada", + "Pública.7": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Pública", + "Federal.7": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Federal", + "Estadual.7": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Estadual", + "Municipal.7": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Municipal", + "Privada.7": "Curso FIC Integrado na Modalidade EJA de Nível Fundamental - Privada", + "Pública.8": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Pública", + "Federal.8": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Federal", + "Estadual.8": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Estadual", + "Municipal.8": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Municipal", + "Privada.8": "Curso FIC Integrado na Modalidade EJA de Nível Médio - Privada"}, + "EJA": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Pública": "Ensino Fundamental - Pública", + "Federal": "Ensino Fundamental - Federal", + "Estadual": "Ensino Fundamental - Estadual", + "Municipal": "Ensino Fundamental - Municipal", + "Privada": "Ensino Fundamental - Privada", + "Pública.1": "Ensino Médio - Pública", + "Federal.1": "Ensino Médio - Federal", + "Estadual.1": "Ensino Médio - Estadual", + "Municipal.1": "Ensino Médio - Municipal", + "Privada.1": "Ensino Médio - Privada", + }, + "Educacao Especial": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Creche": "Educação Infantil - Creche", + "Pré-Escola11": "Educação Infantil - Pré Escola", + "Anos Iniciais13": "Ensino Fundamental - Anos Iniciais", + "Anos Finais14": "Ensino Fundamental - Anos Finais", + "Ensino Médio Propedêutico": "Ensino Médio - Propedêutico", + "Ensino Médio Normal/Magistério": "Ensino Médio - Normal/Magistério", + "Curso Técnico Integrado (Ensino Médio Integrado)": "Ensino Médio - Curso Técnico Integrado", + "Associada ao Ensino Médio18": "Educação Profissional Técnica de Nível Médio - Associada ao Ensino Médio", + "Curso Técnico Concomitante": "Educação Profissional Técnica de Nível Médio - Curso Técnico Concomitante", + "Curso Técnico Subsequente": "Educação Profissional Técnica de Nível Médio - Curso Técnico Subsequente", + "Curso Técnico Misto (Concomitante e Subsequente)": "Educação Profissional Técnica de Nível Médio - Curso Técnico Misto (Concomitante e Subsequente)", + "Curso FIC Concomitante": "Educação Profissional - Curso FIC Concomitante", + "Curso FIC Integrado na Modalidade EJA20": "Educação Profissional - Curso FIC Integrado na Modalidade EJA", + "Ensino Fundamental22": "EJA - Ensino Fundamental", + "Ensino Médio23": "EJA - Ensino Médio", + "Classes Comuns25": "Educação Especial - Classes Comuns", + "Classes Exclusivas26": "Educação Especial - Classes Exclusivas", + ##### + # Para valores anteriores a 2016 e 2013 + #### + # "Classes Comuns": "Educação Especial - Classes Comuns", + # "Classes Exclusivas": "Educação Especial - Classes Exclusivas", + }, + "Educacao Especial - Classes Comuns": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Creche": "Educação Infantil - Creche", + "Pré-Escola11": "Educação Infantil - Pré Escola", + "Anos Iniciais13": "Ensino Fundamental - Anos Iniciais", + "Anos Finais14": "Ensino Fundamental - Anos Finais", + "Ensino Médio Propedêutico": "Ensino Médio - Propedêutico", + "Ensino Médio Normal/ Magistério": "Ensino Médio - Normal/Magistério", + "Ensino Médio Curso Técnico Integrado (Ensino Médio Integrado)": "Ensino Médio - Curso Técnico Integrado", + "Associada ao Ensino Médio18": "Educação Profissional Técnica de Nível Médio - Associada ao Ensino Médio", + "Curso Técnico Concomitante": "Educação Profissional Técnica de Nível Médio - Curso Técnico Concomitante", + "Curso Técnico Subsequente": "Educação Profissional Técnica de Nível Médio - Curso Técnico Subsequente", + "Curso Técnico Misto (Concomitante e Subsequente)": "Educação Profissional Técnica de Nível Médio - Curso Técnico Misto (Concomitante e Subsequente)", + "Curso FIC Concomitante": "Educação Profissional - Curso FIC Concomitante", + "Curso FIC Integrado na Modalidade EJA20": "Educação Profissional - Curso FIC Integrado na Modalidade EJA", + "Ensino Fundamental22": "EJA - Ensino Fundamental", + "Ensino Médio23": "EJA - Ensino Médio", + }, + "Educacao Especial - Classes Exclusivas": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Creche": "Educação Infantil - Creche", + "Pré-Escola11": "Educação Infantil - Pré Escola", + "Anos Iniciais13": "Ensino Fundamental - Anos Iniciais", + "Anos Finais14": "Ensino Fundamental - Anos Finais", + "Ensino Médio Propedêutico": "Ensino Médio - Propedêutico", + "Ensino Médio Normal/ Magistério": "Ensino Médio - Normal/Magistério", + "Curso Técnico Integrado (Ensino Médio Integrado)": "Ensino Médio - Curso Técnico Integrado", + "Associada ao Ensino Médio18": "Educação Profissional Técnica de Nível Médio - Associada ao Ensino Médio", + "Curso Técnico Concomitante": "Educação Profissional Técnica de Nível Médio - Curso Técnico Concomitante", + "Curso Técnico Subsequente": "Educação Profissional Técnica de Nível Médio - Curso Técnico Subsequente", + "Curso Técnico Misto (Concomitante e Subsequente)": "Educação Profissional Técnica de Nível Médio - Curso Técnico Misto (Concomitante e Subsequente)", + "Curso FIC Concomitante": "Educação Profissional - Curso FIC Concomitante", + "Curso FIC Integrado na Modalidade EJA20": "Educação Profissional - Curso FIC Integrado na Modalidade EJA", + "Ensino Fundamental22": "EJA - Ensino Fundamental", + "Ensino Médio23": "EJA - Ensino Médio", + }, + "Educacao Indigena": { + "Unnamed: 1": "uf", + "Unnamed: 3": "id_municipio", + "Creche": "Educação Infantil - Creche", + "Pré-Escola11": "Educação Infantil - Pré Escola", + "Anos Iniciais13": "Ensino Fundamental - Anos Iniciais", + "Anos Finais14": "Ensino Fundamental - Anos Finais", + "Ensino Médio Propedêutico": "Ensino Médio - Propedêutico", + "Ensino Médio Normal/Magistério": "Ensino Médio - Normal/Magistério", + "Curso Técnico Integrado (Ensino Médio Integrado)": "Ensino Médio - Curso Técnico Integrado", + "Associada ao Ensino Médio18": "Educação Profissional Técnica de Nível Médio - Associada ao Ensino Médio", + "Curso Técnico Concomitante": "Educação Profissional Técnica de Nível Médio - Curso Técnico Concomitante", + "Curso Técnico Subsequente": "Educação Profissional Técnica de Nível Médio - Curso Técnico Subsequente", + "Curso Técnico Misto (Concomitante e Subsequente)": "Educação Profissional Técnica de Nível Médio - Curso Técnico Misto (Concomitante e Subsequente)", + "Curso FIC Concomitante": "Educação Profissional - Curso FIC Concomitante", + "Curso FIC Integrado na Modalidade EJA20": "Educação Profissional - Curso FIC Integrado na Modalidade EJA", + "Ensino Fundamental22": "EJA - Ensino Fundamental", + "Ensino Médio23": "EJA - Ensino Médio", + "Classes Comuns25": "Educação Especial - Classes Comuns", + "Classes Exclusivas26": "Educação Especial - Classes Exclusivas", + }} + + +etapa_ensino = { + "educacao_basica": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Basica"], + "chave": "Educação Básica 2.1", + "valor": "Educacao Basica", + "skiprows": 8, + "table": "docente_etapa_ensino", + }, + "educacao_infantil": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Infantil"], + "chave": "Educação Infantil 2.6", + #"chave": "Educação Infantil 2.5", # Em 2010, a chave é 2.5 + "valor": "Educacao Infantil", + "skiprows": 8, + "table": "docente_etapa_ensino", + }, + "ensino_fundamental": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Ensino Fundamental"], + "chave": "Ensino Fundamental 2.16", + #"chave": "Ensino Fundamental 2.13", # Em 2010, a chave é 2.13 + "valor": "Ensino Fundamental", + "skiprows": 8, + "table": "docente_etapa_ensino", + }, + "educacao_profissional": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Profissional"], + "chave": "Educação Profissional 2.33", + #"chave": "Educação Profissional 2.26", # Em 2010, a chave é 26 + "valor": "Ensino Profissional", + "skiprows": 9, + "table": "docente_etapa_ensino", + }, + "EJA": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["EJA"], + "chave": "EJA 2.38", + #"chave": "EJA 2.30", # Em 2010, a chave é 2.30 + "valor": "EJA", + "skiprows": 8, + "table": "docente_etapa_ensino", + }, + "educacao_especial": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Especial"], + "chave": "Educação Especial 2.43", + #"chave": "Educação Especial 2.34", # Em 2010, a chave é 2.34 + "valor": "Educacao Especial", + "skiprows": 8, + "table": "docente_etapa_ensino", + }, + "educacao_especial_classes_comuns": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Especial - Classes Comuns"], + "chave": "Classes Comuns 2.44", + #"chave": "Classes Comuns 2.35", # Em 2010, a chave é 2.35 + "valor": "Educacao Especial - Classes Comuns", + "skiprows": 8, + "table": "docente_etapa_ensino", + }, + "educacao_especial_classes_exclusivas": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Especial - Classes Exclusivas"], + "chave": "Classes Exclusivas 2.50", + #"chave": "Classes Exclusivas 2.49", # Em 2011, a chave é 2.49 + #"chave" : "Classes Exclusivas 2.39", # Em 2010, a chave é 2.39 + "valor": "Educacao Especial - Classes Exclusivas", + "skiprows": 8, + "table": "docente_etapa_ensino", + }, + "educacao_indigena": { + "dicionario": RENAMES_ETAPA_ENSINO_SERIE["Educacao Indigena"], + "chave": "Educação Indígena 2.56", + "valor": "Educacao Indigena", + "skiprows": 9, + "table": "docente_etapa_ensino", + }, +} + + +def read_sheet( + table: str, ano: int, chave: str, valor: str, dicionario: dict, skiprows: int = 9 +) -> pd.DataFrame: + print("Tratando dados de", valor) + df = pd.read_excel( + os.path.join( + INPUT, + f"Sinopse_Estatistica_da_Educacao_Basica_{ano}", + f"Sinopse_Estatistica_da_Educacao_Basica_{ano}.xlsx", + ), + skiprows=skiprows, + sheet_name=chave, + ) + + sheets_etapa_ensino_serie = { + chave: valor + } + + dfs_etapa_ensino_serie = { + name: pd.read_excel( + os.path.join( + INPUT, + f"Sinopse_Estatistica_da_Educacao_Basica_{ano}", + f"Sinopse_Estatistica_da_Educacao_Basica_{ano}.xlsx", + ), + skiprows=skiprows, + sheet_name=sheet_name, + ) + for sheet_name, name in sheets_etapa_ensino_serie.items() + } + + dataframes = {} + for table_name, columns in dfs_etapa_ensino_serie.items(): + df = pd.DataFrame(columns) + dataframes[table_name] = df + + print(df.columns) + + def drop_unused_columns(df: pd.DataFrame) -> pd.DataFrame: + cols_drop = [ + col + for col in df.columns + if col.startswith("Unnamed") or col.startswith("Total") + ] + + return df.drop(columns=cols_drop) + + dfs_etapa_ensino_serie = { + name: drop_unused_columns(df.rename(columns=dicionario, errors="raise")) + for name, df in dfs_etapa_ensino_serie.items() + } + + df_etapa_ensino = pd.concat( + [ + df.pipe( + lambda d: d.loc[ + (d["id_municipio"].notna()) & (d["id_municipio"] != " "), + ] + ) + .pipe( + lambda d: pd.melt( + d, + id_vars=["id_municipio", "uf"], + value_vars=d.columns.difference( + ["id_municipio", "uf"] + ).tolist(), # Convert to list + var_name="etapa_ensino", + value_name="quantidade_docentes", + ) + ) + .assign(tipo_classe=tipo_classe) + for tipo_classe, df in dfs_etapa_ensino_serie.items() + ] + ) + + df_etapa_ensino["etapa_ensino"] = ( + df_etapa_ensino["etapa_ensino"].str.strip().replace("", np.nan).dropna() + ) + + bd_dir = bd.read_sql( + "SELECT nome, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`", + billing_project_id="basedosdados", + reauth=False, + ) + + df_etapa_ensino["etapa_ensino"] = ( + df_etapa_ensino["etapa_ensino"].str.strip().replace("", np.nan) + ) + + df_etapa_ensino['quantidade_docentes'] = df_etapa_ensino['quantidade_docentes'].astype(int) + + df_etapa_ensino = df_etapa_ensino[pd.notna(df_etapa_ensino["etapa_ensino"])] + df_etapa_ensino["uf"] = ( + df_etapa_ensino["uf"] + .apply(lambda uf: uf.strip()) + .replace({i["nome"]: i["sigla"] for i in bd_dir.to_dict("records")}) # type: ignore + ) + df_etapa_ensino = df_etapa_ensino.rename(columns={"uf": "sigla_uf"}, errors="raise") + for sigla_uf, df in df_etapa_ensino.groupby("sigla_uf"): + path = os.path.join(OUTPUT, f"{table}", f"ano={ano}", f"sigla_uf={sigla_uf}") + if not os.path.exists(path): + os.makedirs(path, exist_ok=True) + df.drop(columns=["sigla_uf"]).to_csv( + os.path.join(path, "data.csv"), index=False, mode="w" + ) + else: + df.drop(columns=["sigla_uf"]).to_csv( + os.path.join(path, "data.csv"), index=False, mode="a", header=False + ) + + return df_etapa_ensino + +if __name__ == "__main__": + lista = [ + "educacao_basica", + "educacao_infantil", + "ensino_fundamental", + "educacao_profissional", + "EJA", + "educacao_especial", + "educacao_especial_classes_comuns", + "educacao_especial_classes_exclusivas", + #"educacao_indigena", + ] + + for x in lista: + read_sheet( + table=etapa_ensino[x]["table"], + ano=2021, + chave=etapa_ensino[x]["chave"], + valor=etapa_ensino[x]["valor"], + dicionario=etapa_ensino[x]["dicionario"], + skiprows=etapa_ensino[x]["skiprows"], + ) diff --git a/models/br_inep_sinopse_estatistica_educacao_basica/schema.yml b/models/br_inep_sinopse_estatistica_educacao_basica/schema.yml index 253a0460..93301eac 100644 --- a/models/br_inep_sinopse_estatistica_educacao_basica/schema.yml +++ b/models/br_inep_sinopse_estatistica_educacao_basica/schema.yml @@ -165,12 +165,18 @@ models: description: Raça/Cor - name: quantidade_matricula description: Número de matrículas - - name: br_inep_sinopse_estatistica_educacao_basica__educacao_especial_etapa_ensino - description: Número de alunos matriculados na Educação Especial ao longo do tempo - por tipo de classe (Comuns ou Exclusivas) e Etapa de Ensino + - name: br_inep_sinopse_estatistica_educacao_basica__docente_escolaridade + description: A base conta com o total de docentes por município, escolaridade + e rede tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ano + - id_municipio + - escolaridade + - tipo_classe - not_null_proportion_multiple_columns: - at_least: 0.05 + at_least: 0.95 columns: - name: ano description: Ano @@ -191,19 +197,25 @@ models: to: ref('br_bd_diretorios_brasil__municipio') field: id_municipio - name: tipo_classe - description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes - Exclusivas/Especiais) - - name: etapa_ensino - description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos - Iniciais, Anos Finais, Ensino Fundamental - EJA, ...) - - name: quantidade_matricula - description: Número de matrículas - - name: br_inep_sinopse_estatistica_educacao_basica__educacao_especial_faixa_etaria - description: Número de alunos matriculados na Educação Especial ao longo do tempo - por tipo de classe (Comuns ou Exclusivas) e faixa etária + description: 'Tipo de classe da Educação Especial (e.g.: Classes Comuns ou + Classes Exclusivas/Especiais)' + - name: escolaridade + description: Escolaridade + - name: quantidade_docente + description: Número de Docentes + - name: br_inep_sinopse_estatistica_educacao_basica__docente_regime_contrato + description: A base conta com o total de docentes por município, regime de contrato + e rede tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ano + - id_municipio + - regime_contrato + - rede + - tipo_classe - not_null_proportion_multiple_columns: - at_least: 0.05 + at_least: 0.95 columns: - name: ano description: Ano @@ -224,18 +236,27 @@ models: to: ref('br_bd_diretorios_brasil__municipio') field: id_municipio - name: tipo_classe - description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes - Exclusivas/Especiais) - - name: faixa_etaria - description: Faixa etária - - name: quantidade_matricula - description: Número de matrículas - - name: br_inep_sinopse_estatistica_educacao_basica__educacao_especial_localizacao - description: Número de alunos matriculados na Educação Especial ao longo do tempo - por tipo de classe, rede e localização + description: 'Tipo de classe da Educação Especial (e.g.: Classes Comuns ou + Classes Exclusivas/Especiais)' + - name: rede + description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada) + - name: regime_contrato + description: Regime de contratação + - name: quantidade_docente + description: Número de Docentes + - name: br_inep_sinopse_estatistica_educacao_basica__docente_faixa_etaria_sexo + description: A base conta com o total de docentes por município, faixa etária + e sexo tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ano + - id_municipio + - faixa_etaria + - sexo + - tipo_classe - not_null_proportion_multiple_columns: - at_least: 0.05 + at_least: 0.95 columns: - name: ano description: Ano @@ -256,20 +277,25 @@ models: to: ref('br_bd_diretorios_brasil__municipio') field: id_municipio - name: tipo_classe - description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes - Exclusivas/Especiais) - - name: rede - description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada) - - name: localizacao - description: Localização (e.g. Zona Urbana, Zona Rural) - - name: quantidade_matricula - description: Número de matrículas - - name: br_inep_sinopse_estatistica_educacao_basica__educacao_especial_sexo_raca_cor - description: Número de alunos matriculados na Educação Especial ao longo do tempo - por tipo de classe, sexo e raça/cor + description: 'Tipo de classe da Educação Especial (e.g.: Classes Comuns ou + Classes Exclusivas/Especiais)' + - name: faixa_etaria + description: Faixa Etária dos docentes + - name: sexo + description: Sexo dos docentes + - name: quantidade_docente + description: Número de Docentes + - name: br_inep_sinopse_estatistica_educacao_basica__docente_deficiencia + description: A base conta com o total de docentes por município, tipo de deficiência tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ano + - id_municipio + - deficiencia + - tipo_classe - not_null_proportion_multiple_columns: - at_least: 0.05 + at_least: 0.95 columns: - name: ano description: Ano @@ -290,20 +316,25 @@ models: to: ref('br_bd_diretorios_brasil__municipio') field: id_municipio - name: tipo_classe - description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes - Exclusivas/Especiais) - - name: sexo - description: Sexo - - name: raca_cor - description: Raça/Cor - - name: quantidade_matricula - description: Número de matrículas - - name: br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tempo_ensino - description: Número de alunos matriculados na Educação Especial ao longo do tempo - por tipo de classe, rede e tempo de ensino (Integral ou Parcial) + description: 'Tipo de classe da Educação Especial (e.g.: Classes Comuns ou + Classes Exclusivas/Especiais)' + - name: deficiencia + description: Tipo de deficiência, transtorno global do desenvolvimento ou + altas habilidades/superdotação + - name: quantidade_docente + description: Número de docentes + - name: br_inep_sinopse_estatistica_educacao_basica__docente_localizacao + description: A base conta com o total de docentes por município, rede e localização tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ano + - id_municipio + - rede + - localizacao + - tipo_classe - not_null_proportion_multiple_columns: - at_least: 0.05 + at_least: 0.95 columns: - name: ano description: Ano @@ -324,21 +355,26 @@ models: to: ref('br_bd_diretorios_brasil__municipio') field: id_municipio - name: tipo_classe - description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes - Exclusivas/Especiais) + description: 'Tipo de classe da Educação Especial (e.g.: Classes Comuns ou + Classes Exclusivas/Especiais)' - name: rede description: Rede Escolar (e.g. Federal, Estadual, Municipal e Privada) - - name: tempo_ensino - description: Classificação em tempo integral e tempo parcial - - name: quantidade_matricula - description: Número de matrículas - - name: br_inep_sinopse_estatistica_educacao_basica__educacao_especial_tipo_deficiencia - description: Número de alunos matriculados na Educação Especial ao longo do tempo - por tipo de classe e tipo de deficiência, transtorno global do desenvolvimento - ou altas habilidades/superdotação + - name: localizacao + description: Localização (e.g. Zona Urbana, Zona Rural) + - name: quantidade_docente + description: Número de docentes + - name: br_inep_sinopse_estatistica_educacao_basica__docente_etapa_ensino + description: A base conta com o total de docentes por município, faixa etária + e sexo tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - ano + - id_municipio + - etapa_ensino + - tipo_classe - not_null_proportion_multiple_columns: - at_least: 0.05 + at_least: 0.95 columns: - name: ano description: Ano @@ -359,10 +395,10 @@ models: to: ref('br_bd_diretorios_brasil__municipio') field: id_municipio - name: tipo_classe - description: Tipo de classe da Educação Especial (e.g. Classes Comuns ou Classes - Exclusivas/Especiais) - - name: tipo_deficiêcia - description: Tipo de deficiência, transtorno global do desenvolvimento ou - altas habilidades/superdotação - - name: quantidade_matricula - description: Número de matrículas + description: 'Tipo de classe da Educação Especial (e.g.: Classes Comuns ou + Classes Exclusivas/Especiais)' + - name: etapa_ensino + description: Etapa e/ou Sub-etapas de ensino (e.g. Creche, Pré-escola, Anos + Iniciais, Anos Finais, Ensino Fundamental - EJA, ...) + - name: quantidade_docente + description: Número de Docentes diff --git a/models/br_me_rais/br_me_rais__dicionario.sql b/models/br_me_rais/br_me_rais__dicionario.sql index 8c55fa33..99d08533 100644 --- a/models/br_me_rais/br_me_rais__dicionario.sql +++ b/models/br_me_rais/br_me_rais__dicionario.sql @@ -1,5 +1,4 @@ {{ config(alias="dicionario", schema="br_me_rais") }} --- Dicionário da Rais select safe_cast(id_tabela as string) id_tabela, safe_cast(nome_coluna as string) nome_coluna, diff --git a/models/br_me_rais/br_me_rais__microdados_estabelecimentos.sql b/models/br_me_rais/br_me_rais__microdados_estabelecimentos.sql index 8a0fedc7..09db0eee 100644 --- a/models/br_me_rais/br_me_rais__microdados_estabelecimentos.sql +++ b/models/br_me_rais/br_me_rais__microdados_estabelecimentos.sql @@ -2,7 +2,7 @@ config( alias="microdados_estabelecimentos", schema="br_me_rais", - materialized="table", + materialized="incremental", partition_by={ "field": "ano", "data_type": "int64", @@ -11,10 +11,11 @@ cluster_by=["sigla_uf"], ) }} + select safe_cast(ano as int64) ano, safe_cast(sigla_uf as string) sigla_uf, - safe_cast(id_municipio as string) id_municipio, + safe_cast(regexp_replace(id_municipio, r'\.0$', '') as string) id_municipio, safe_cast(quantidade_vinculos_ativos as int64) quantidade_vinculos_ativos, safe_cast(quantidade_vinculos_clt as int64) quantidade_vinculos_clt, safe_cast( @@ -23,7 +24,7 @@ select safe_cast(natureza as string) natureza_estabelecimento, safe_cast(natureza_juridica as string) natureza_juridica, safe_cast(tamanho as string) tamanho_estabelecimento, - safe_cast(tipo as string) tipo_estabelecimento, + safe_cast(regexp_replace(tipo, r'^0+', '') as string) as tipo_estabelecimento, safe_cast(indicador_cei_vinculado as int64) indicador_cei_vinculado, safe_cast(indicador_pat as int64) indicador_pat, safe_cast(indicador_simples as string) indicador_simples, @@ -32,22 +33,28 @@ select safe_cast(cnae_1 as string) cnae_1, safe_cast(cnae_2 as string) cnae_2, safe_cast(cnae_2_subclasse as string) cnae_2_subclasse, - cast( - cast(regexp_replace(subsetor_ibge, r'^0+', '') as string) as string - ) as subsetor_ibge, + safe_cast(regexp_replace(subsetor_ibge, r'^0+', '') as string) as subsetor_ibge, safe_cast(subatividade_ibge as string) subatividade_ibge, case - when length(cep) = 7 then lpad(cep, 8, '0') else cast(cep as string) + when length(cep) = 7 then lpad(cep, 8, '0') else safe_cast(cep as string) end as cep, case when bairros_sp = '????????????' then null - else cast(regexp_replace(bairros_sp, r'^0+', '') as string) + else trim(safe_cast(regexp_replace(bairros_sp, r'^0+', '') as string)) end as bairros_sp, - cast(regexp_replace(distritos_sp, r'^0+', '') as string) as distritos_sp, - cast(regexp_replace(bairros_fortaleza, r'^0+', '') as string) as bairros_fortaleza, - nullif(cast(regexp_replace(bairros_rj, r'^0+', '') as string), '') as bairros_rj, - cast( - regexp_replace(regioes_administrativas_df, r'^0+', '') as string + trim(safe_cast(regexp_replace(distritos_sp, r'^0+', '') as string)) as distritos_sp, + trim( + safe_cast(regexp_replace(bairros_fortaleza, r'^0+', '') as string) + ) as bairros_fortaleza, + trim( + nullif(safe_cast(regexp_replace(bairros_rj, r'^0+', '') as string), '') + ) as bairros_rj, + trim( + safe_cast(regexp_replace(regioes_administrativas_df, r'^0+', '') as string) ) as regioes_administrativas_df from `basedosdados-staging.br_me_rais_staging.microdados_estabelecimentos` as t +{% if is_incremental() %} + where + safe_cast(ano as int64) > (select safe_cast(max(ano) as int64) from {{ this }}) +{% endif %} diff --git a/models/br_me_rais/br_me_rais__microdados_vinculos.sql b/models/br_me_rais/br_me_rais__microdados_vinculos.sql index 8b2a475b..fc11a07c 100644 --- a/models/br_me_rais/br_me_rais__microdados_vinculos.sql +++ b/models/br_me_rais/br_me_rais__microdados_vinculos.sql @@ -2,7 +2,7 @@ config( alias="microdados_vinculos", schema="br_me_rais", - materialized="table", + materialized="incremental", partition_by={ "field": "ano", "data_type": "int64", @@ -15,7 +15,7 @@ select safe_cast(ano as int64) ano, safe_cast(sigla_uf as string) sigla_uf, - safe_cast(id_municipio as string) id_municipio, + safe_cast(regexp_replace(id_municipio, r'\.0$', '') as string) id_municipio, safe_cast(tipo_vinculo as string) tipo_vinculo, safe_cast(vinculo_ativo_3112 as string) vinculo_ativo_3112, safe_cast(tipo_admissao as string) tipo_admissao, @@ -27,7 +27,7 @@ select safe_cast(causa_desligamento_3 as string) causa_desligamento_3, safe_cast(faixa_tempo_emprego as string) faixa_tempo_emprego, safe_cast(faixa_horas_contratadas as string) faixa_horas_contratadas, - safe_cast(tempo_emprego as float64) tempo_emprego, + round(safe_cast(tempo_emprego as float64), 2) tempo_emprego, safe_cast(quantidade_horas_contratadas as int64) quantidade_horas_contratadas, safe_cast(id_municipio_trabalho as string) id_municipio_trabalho, safe_cast(quantidade_dias_afastamento as int64) quantidade_dias_afastamento, @@ -37,10 +37,14 @@ select indicador_trabalho_intermitente as string ) indicador_trabalho_intermitente, safe_cast(faixa_remuneracao_media_sm as string) faixa_remuneracao_media_sm, - safe_cast(valor_remuneracao_media_sm as float64) valor_remuneracao_media_sm, + round( + safe_cast(valor_remuneracao_media_sm as float64), 2 + ) valor_remuneracao_media_sm, safe_cast(valor_remuneracao_media as float64) valor_remuneracao_media, safe_cast(faixa_remuneracao_dezembro_sm as string) faixa_remuneracao_dezembro_sm, - safe_cast(valor_remuneracao_dezembro_sm as float64) valor_remuneracao_dezembro_sm, + round( + safe_cast(valor_remuneracao_dezembro_sm as float64), 2 + ) valor_remuneracao_dezembro_sm, safe_cast(valor_remuneracao_janeiro as float64) valor_remuneracao_janeiro, safe_cast(valor_remuneracao_fevereiro as float64) valor_remuneracao_fevereiro, safe_cast(valor_remuneracao_marco as float64) valor_remuneracao_marco, @@ -82,19 +86,23 @@ select then 'Não identificado' when tipo_estabelecimento = 'CEI/CNO' then 'CEI' - else tipo_estabelecimento + else safe_cast(regexp_replace(tipo_estabelecimento, r'^0+', '') as string) end as tipo_estabelecimento, safe_cast(natureza_juridica as string) natureza_juridica, safe_cast(indicador_simples as string) indicador_simples, - cast(cast(regexp_replace(bairros_sp, r'^0+', '') as int64) as string) as bairros_sp, - cast( - cast(regexp_replace(distritos_sp, r'^0+', '') as int64) as string - ) as distritos_sp, - cast( - cast(regexp_replace(bairros_fortaleza, r'^0+', '') as int64) as string + trim(safe_cast(regexp_replace(bairros_sp, r'^0+', '') as string)) as bairros_sp, + trim(safe_cast(regexp_replace(distritos_sp, r'^0+', '') as string)) as distritos_sp, + trim( + safe_cast(regexp_replace(bairros_fortaleza, r'^0+', '') as string) ) as bairros_fortaleza, - cast(cast(regexp_replace(bairros_rj, r'^0+', '') as int64) as string) as bairros_rj, - cast( - cast(regexp_replace(regioes_administrativas_df, r'^0+', '') as int64) as string - ) as regioes_administrativas_df, + trim( + nullif(safe_cast(regexp_replace(bairros_rj, r'^0+', '') as string), '') + ) as bairros_rj, + trim( + safe_cast(regexp_replace(regioes_administrativas_df, r'^0+', '') as string) + ) as regioes_administrativas_df from `basedosdados-staging.br_me_rais_staging.microdados_vinculos` +{% if is_incremental() %} + where + safe_cast(ano as int64) > (select safe_cast(max(ano) as int64) from {{ this }}) +{% endif %} diff --git a/models/br_me_rais/code/rais_estabelecimento.ipynb b/models/br_me_rais/code/rais_estabelecimento.ipynb new file mode 100755 index 00000000..100937a3 --- /dev/null +++ b/models/br_me_rais/code/rais_estabelecimento.ipynb @@ -0,0 +1,275 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "import basedosdados as bd\n", + "import numpy as np\n", + "from datetime import datetime\n", + "from os.path import join\n", + "from pathlib import Path\n", + "from typing import Any, Dict, List, Optional, Tuple, Union\n", + "pd.set_option(\"display.max_columns\", None)\n", + "\n", + "def to_partitions(\n", + " data: pd.DataFrame,\n", + " partition_columns: List[str],\n", + " savepath: str,\n", + " file_type: str = \"csv\",\n", + "):\n", + " \"\"\"Save data in to hive patitions schema, given a dataframe and a list of partition columns.\n", + " Args:\n", + " data (pandas.core.frame.DataFrame): Dataframe to be partitioned.\n", + " partition_columns (list): List of columns to be used as partitions.\n", + " savepath (str, pathlib.PosixPath): folder path to save the partitions.\n", + " file_type (str): default to csv. Accepts parquet.\n", + " Exemple:\n", + " data = {\n", + " \"ano\": [2020, 2021, 2020, 2021, 2020, 2021, 2021,2025],\n", + " \"mes\": [1, 2, 3, 4, 5, 6, 6,9],\n", + " \"sigla_uf\": [\"SP\", \"SP\", \"RJ\", \"RJ\", \"PR\", \"PR\", \"PR\",\"PR\"],\n", + " \"dado\": [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\", \"g\",'h'],\n", + " }\n", + " to_partitions(\n", + " data=pd.DataFrame(data),\n", + " partition_columns=['ano','mes','sigla_uf'],\n", + " savepath='partitions/',\n", + " )\n", + " \"\"\"\n", + "\n", + " if isinstance(data, (pd.core.frame.DataFrame)):\n", + " savepath = Path(savepath)\n", + " # create unique combinations between partition columns\n", + " unique_combinations = (\n", + " data[partition_columns]\n", + " # .astype(str)\n", + " .drop_duplicates(subset=partition_columns).to_dict(orient=\"records\")\n", + " )\n", + "\n", + " for filter_combination in unique_combinations:\n", + " patitions_values = [\n", + " f\"{partition}={value}\"\n", + " for partition, value in filter_combination.items()\n", + " ]\n", + "\n", + " # get filtered data\n", + " df_filter = data.loc[\n", + " data[filter_combination.keys()]\n", + " .isin(filter_combination.values())\n", + " .all(axis=1),\n", + " :,\n", + " ]\n", + " df_filter = df_filter.drop(columns=partition_columns)\n", + "\n", + " # create folder tree\n", + " filter_save_path = Path(savepath / \"/\".join(patitions_values))\n", + " filter_save_path.mkdir(parents=True, exist_ok=True)\n", + "\n", + " if file_type == \"csv\":\n", + " # append data to csv\n", + " file_filter_save_path = Path(filter_save_path) / \"data.csv\"\n", + " df_filter.to_csv(\n", + " file_filter_save_path,\n", + " sep=\",\",\n", + " encoding=\"utf-8\",\n", + " na_rep=\"\",\n", + " index=False,\n", + " mode=\"a\",\n", + " header=not file_filter_save_path.exists(),\n", + " )\n", + " elif file_type == \"parquet\":\n", + " # append data to parquet\n", + " file_filter_save_path = Path(filter_save_path) / \"data.parquet\"\n", + " df_filter.to_parquet(\n", + " file_filter_save_path, index=False, compression=\"gzip\"\n", + " )\n", + " else:\n", + " raise BaseException(\"Data need to be a pandas DataFrame\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading: 100%|██████████| 5570/5570 [00:00<00:00, 6164.87rows/s]\n", + "Downloading: 100%|██████████| 27/27 [00:00<00:00, 86.26rows/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "natureza\n", + "subatividade_ibge\n" + ] + } + ], + "source": [ + "df = pd.read_csv(\n", + " \"X:\\\\dados\\\\br_me_rais\\\\estabelecimento_2023\\\\RAIS_ESTAB_PUB.txt\",\n", + " encoding=\"latin1\",\n", + " sep=\";\",\n", + " dtype=str,\n", + ")\n", + "\n", + "df.rename(columns={\n", + " 'Bairros SP' : 'bairros_sp',\n", + " 'Bairros Fortaleza' : 'bairros_fortaleza',\n", + " 'Bairros RJ' : 'bairros_rj',\n", + " 'CNAE 2.0 Classe' : 'cnae_2',\n", + " 'CNAE 95 Classe' : 'cnae_1',\n", + " 'Distritos SP' : 'distritos_sp',\n", + " 'Qtd Vínculos CLT' : 'quantidade_vinculos_clt',\n", + " 'Qtd Vínculos Ativos' : 'quantidade_vinculos_ativos',\n", + " 'Qtd Vínculos Estatutários' : 'quantidade_vinculos_estatutarios',\n", + " 'Ind Atividade Ano' : 'indicador_atividade_ano',\n", + " 'Ind CEI Vinculado' : 'indicador_cei_vinculado',\n", + " 'Ind Estab Participa PAT' : 'indicador_pat',\n", + " 'Ind Rais Negativa' : 'indicador_rais_negativa',\n", + " 'Ind Simples' : 'indicador_simples',\n", + " 'Município' : 'municipio',\n", + " 'Natureza Jurídica' : 'natureza_juridica',\n", + " 'Regiões Adm DF' : 'regioes_administrativas_df',\n", + " 'CNAE 2.0 Subclasse' : 'cnae_2_subclasse',\n", + " 'Tamanho Estabelecimento' : 'tamanho',\n", + " 'Tipo Estab' : 'tipo',\n", + " 'UF' : 'uf',\n", + " 'IBGE Subsetor' : 'subsetor_ibge',\n", + " 'CEP Estab' : 'cep',\n", + " }, inplace=True)\n", + "\n", + "df['ano'] = 2023\n", + "\n", + "df['municipio'] = df['municipio'].astype(str)\n", + "\n", + "# Carregar os arquivos\n", + "\n", + "df_municipio = bd.read_sql('SELECT id_municipio, id_municipio_6 FROM `basedosdados.br_bd_diretorios_brasil.municipio`', billing_project_id='basedosdados', reauth=False)\n", + "df_uf = bd.read_sql('SELECT id_uf, sigla FROM `basedosdados.br_bd_diretorios_brasil.uf`', billing_project_id='basedosdados', reauth=False)\n", + "\n", + "# Mescla com o arquivo de municípios\n", + "df = pd.merge(df, df_municipio, left_on='municipio', right_on='id_municipio_6', how='left')\n", + "df.drop(['id_municipio_6', 'municipio'], axis=1, inplace=True)\n", + "\n", + "# Gerar a sigla_uf\n", + "\n", + "# Mescla com o arquivo de UFs\n", + "df['uf'] = df['uf'].astype(str)\n", + "df = pd.merge(df, df_uf, left_on='uf', right_on='id_uf', how='left')\n", + "df = df.drop(['id_uf', 'uf'], axis=1)\n", + "df = df.rename(columns={'sigla': 'sigla_uf'})\n", + "\n", + "\n", + "# Substitui sigla_uf vazia por \"IGNORADO\"\n", + "df['sigla_uf'].replace(np.nan, \"IGNORADO\", inplace=True)\n", + "\n", + "# Padronização das variáveis e dados\n", + "for col in df.columns:\n", + " if df[col].dtype == 'str':\n", + " df[col] = df[col].str.strip()\n", + " df[col].replace([\"{ñ\", \"{ñ class}\", \"{ñ c\"], \"\", inplace=True)\n", + "\n", + "# Lista de variáveis\n", + "vars_list = [\n", + " 'ano',\n", + " 'sigla_uf',\n", + " 'id_municipio',\n", + " 'quantidade_vinculos_ativos',\n", + " 'quantidade_vinculos_clt',\n", + " 'quantidade_vinculos_estatutarios',\n", + " 'natureza',\n", + " 'natureza_juridica',\n", + " 'tamanho',\n", + " 'tipo',\n", + " 'indicador_cei_vinculado',\n", + " 'indicador_pat',\n", + " 'indicador_simples',\n", + " 'indicador_rais_negativa',\n", + " 'indicador_atividade_ano',\n", + " 'cnae_1',\n", + " 'cnae_2',\n", + " 'cnae_2_subclasse',\n", + " 'subsetor_ibge',\n", + " 'subatividade_ibge',\n", + " 'cep',\n", + " 'bairros_sp',\n", + " 'distritos_sp',\n", + " 'bairros_fortaleza',\n", + " 'bairros_rj',\n", + " 'regioes_administrativas_df'\n", + "]\n", + "\n", + "# Gera as variáveis não confirmadas\n", + "for var in vars_list:\n", + " if var not in df.columns:\n", + " print(var)\n", + " df[var] = \"\"\n", + "\n", + "# Limpeza adicional de variáveis\n", + "for col in df.columns:\n", + " if df[col].dtype == 'str':\n", + " print(col)\n", + " df[col] = df[col].str.strip()\n", + " df[col].replace([\"{ñ\", \"{ñ class}\", \"{ñ c\", \"{ñ clas}\"], \"\", inplace=True)\n", + "\n", + "# Limpeza para variáveis específicas\n", + "for col in ['bairros_sp', 'distritos_sp', 'bairros_fortaleza', 'bairros_rj', 'distritos_sp', 'regioes_administrativas_df', 'cnae_2', 'cnae_2_subclasse', 'subsetor_ibge', 'subatividade_ibge']:\n", + " df[col].replace([\"0000\", \"00000\", \"000000\", \"0000000\", \"0000-1\", \"000-1\", \"998\", \"999\", \"9999\", \"9997\", \"00\", \"-1\"], \"\", inplace=True)\n", + "\n", + "# Limpeza de natureza_juridica e cep\n", + "df['natureza_juridica'].replace([\"9990\", \"9999\"], \"\", inplace=True)\n", + "df['cep'].replace(\"0\", \"\", inplace=True)\n", + "\n", + "# Ajuste na variável tipo\n", + "df['tipo'].replace([\"CNPJ\", \"Cnpj\", \"01\", \"1\"], \"1\", inplace=True)\n", + "df['tipo'].replace([\"CAEPF\", \"Caepf\"], \"2\", inplace=True)\n", + "df['tipo'].replace([\"CEI\", \"Cei\", \"CEI/CNO\", \"Cei/Cno\", \"CNO\", \"Cno\", \"03\", \"3\"], \"3\", inplace=True)\n", + "\n", + "# Converte colunas para numérico\n", + "cols_to_numeric = ['id_municipio', 'quantidade_vinculos_ativos', 'quantidade_vinculos_clt', 'quantidade_vinculos_estatutarios', 'tamanho', 'indicador_cei_vinculado', 'indicador_pat', 'indicador_simples', 'indicador_rais_negativa', 'indicador_atividade_ano']\n", + "df[cols_to_numeric] = df[cols_to_numeric].apply(pd.to_numeric, errors='coerce')\n", + "\n", + "# Reordena as colunas\n", + "df = df[vars_list]\n", + "\n", + "to_partitions(\n", + " data=df,\n", + " partition_columns=[\"ano\", \"sigla_uf\"],\n", + " savepath=\"X:\\\\dados\\\\br_me_rais\\\\estabelecimento_2023\\\\estabelecimento\",\n", + " file_type=\"csv\",\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/models/br_me_rais/code/rais_vinculo.ipynb b/models/br_me_rais/code/rais_vinculo.ipynb new file mode 100755 index 00000000..4ff17521 --- /dev/null +++ b/models/br_me_rais/code/rais_vinculo.ipynb @@ -0,0 +1,452 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "from os.path import join\n", + "from pathlib import Path\n", + "from typing import Any, Dict, List, Optional, Tuple, Union\n", + "import basedosdados as bd # type: ignore\n", + "import numpy as np\n", + "import pandas as pd\n", + "import gc\n", + "import tqdm\n", + "\n", + "\n", + "def to_partitions(\n", + " data: pd.DataFrame,\n", + " partition_columns: List[str],\n", + " savepath: str,\n", + " file_type: str = \"csv\",\n", + "):\n", + " \"\"\"Save data in to hive patitions schema, given a dataframe and a list of partition columns.\n", + " Args:\n", + " data (pandas.core.frame.DataFrame): Dataframe to be partitioned.\n", + " partition_columns (list): List of columns to be used as partitions.\n", + " savepath (str, pathlib.PosixPath): folder path to save the partitions.\n", + " file_type (str): default to csv. Accepts parquet.\n", + " Exemple:\n", + " data = {\n", + " \"ano\": [2020, 2021, 2020, 2021, 2020, 2021, 2021,2025],\n", + " \"mes\": [1, 2, 3, 4, 5, 6, 6,9],\n", + " \"sigla_uf\": [\"SP\", \"SP\", \"RJ\", \"RJ\", \"PR\", \"PR\", \"PR\",\"PR\"],\n", + " \"dado\": [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\", \"g\",'h'],\n", + " }\n", + " to_partitions(\n", + " data=pd.DataFrame(data),\n", + " partition_columns=['ano','mes','sigla_uf'],\n", + " savepath='partitions/',\n", + " )\n", + " \"\"\"\n", + "\n", + " if isinstance(data, (pd.core.frame.DataFrame)):\n", + " savepath = Path(savepath)\n", + " # create unique combinations between partition columns\n", + " unique_combinations = (\n", + " data[partition_columns]\n", + " # .astype(str)\n", + " .drop_duplicates(subset=partition_columns).to_dict(orient=\"records\")\n", + " )\n", + "\n", + " for filter_combination in unique_combinations:\n", + " patitions_values = [\n", + " f\"{partition}={value}\"\n", + " for partition, value in filter_combination.items()\n", + " ]\n", + "\n", + " # get filtered data\n", + " df_filter = data.loc[\n", + " data[filter_combination.keys()]\n", + " .isin(filter_combination.values())\n", + " .all(axis=1),\n", + " :,\n", + " ]\n", + " df_filter = df_filter.drop(columns=partition_columns)\n", + "\n", + " # create folder tree\n", + " filter_save_path = Path(savepath / \"/\".join(patitions_values))\n", + " filter_save_path.mkdir(parents=True, exist_ok=True)\n", + "\n", + " if file_type == \"csv\":\n", + " # append data to csv\n", + " file_filter_save_path = Path(filter_save_path) / \"data.csv\"\n", + " df_filter.to_csv(\n", + " file_filter_save_path,\n", + " sep=\",\",\n", + " encoding=\"utf-8\",\n", + " na_rep=\"\",\n", + " index=False,\n", + " mode=\"a\",\n", + " header=not file_filter_save_path.exists(),\n", + " )\n", + " elif file_type == \"parquet\":\n", + " # append data to parquet\n", + " file_filter_save_path = Path(filter_save_path) / \"data.parquet\"\n", + " df_filter.to_parquet(\n", + " file_filter_save_path, index=False, compression=\"gzip\"\n", + " )\n", + " else:\n", + " raise BaseException(\"Data need to be a pandas DataFrame\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option(\"display.max_columns\", None)\n", + "\n", + "chucks = []\n", + "\n", + "\n", + "df_municipio = bd.read_sql(\n", + " \"SELECT id_municipio, id_municipio_6, sigla_uf FROM `basedosdados.br_bd_diretorios_brasil.municipio`\",\n", + " billing_project_id=\"basedosdados\",\n", + " reauth=False,\n", + ")\n", + "\n", + "valor = 0\n", + "\n", + "for chunk in tqdm.tqdm(\n", + " pd.read_csv(\n", + " \"X:\\\\dados\\\\br_me_rais\\\\vinculos_2023\\\\RAIS_VINC_PUB_SUL.txt\",\n", + " sep=\";\",\n", + " encoding=\"latin1\",\n", + " low_memory=False,\n", + " decimal=\",\",\n", + " chunksize=100000,\n", + " )\n", + "):\n", + " valor = valor + 1\n", + " print(f\"Quantidade: {valor}\")\n", + "\n", + " ints = chunk.select_dtypes(include=[\"int64\", \"int32\", \"int16\"]).columns\n", + " chunk[ints] = chunk[ints].apply(pd.to_numeric, downcast=\"integer\")\n", + "\n", + " floats = chunk.select_dtypes(include=[\"float\"]).columns\n", + " chunk[floats] = chunk[floats].apply(pd.to_numeric, downcast=\"float\")\n", + "\n", + " objects = chunk.select_dtypes(\"object\").columns\n", + " chunk[objects] = chunk[objects].apply(lambda x: x.astype(\"category\"))\n", + "\n", + " chunk.rename(\n", + " columns={\n", + " \"Tipo Vínculo\": \"tipo_vinculo\",\n", + " \"Vínculo Ativo 31/12\": \"vinculo_ativo_3112\",\n", + " \"Tipo Admissão\": \"tipo_admissao\",\n", + " \"Mês Admissão\": \"mes_admissao\",\n", + " \"Mês Desligamento\": \"mes_desligamento\",\n", + " \"Motivo Desligamento\": \"motivo_desligamento\",\n", + " \"Causa Afastamento 1\": \"causa_desligamento_1\",\n", + " \"Causa Afastamento 2\": \"causa_desligamento_2\",\n", + " \"Causa Afastamento 3\": \"causa_desligamento_3\",\n", + " \"Faixa Tempo Emprego\": \"faixa_tempo_emprego\",\n", + " \"Tempo Emprego\": \"tempo_emprego\",\n", + " \"Faixa Hora Contrat\": \"faixa_horas_contratadas\",\n", + " \"Qtd Hora Contr\": \"quantidade_horas_contratadas\",\n", + " \"Mun Trab\": \"id_municipio_trabalho\",\n", + " \"Qtd Dias Afastamento\": \"quantidade_dias_afastamento\",\n", + " \"Ind CEI Vinculado\": \"indicador_cei_vinculado\",\n", + " \"Ind Trab Parcial\": \"indicador_trabalho_parcial\",\n", + " \"Ind Trab Intermitente\": \"indicador_trabalho_intermitente\",\n", + " \"Faixa Remun Média (SM)\": \"faixa_remuneracao_media_sm\",\n", + " \"Vl Remun Média (SM)\": \"valor_remuneracao_media_sm\",\n", + " \"Vl Remun Média Nom\": \"valor_remuneracao_media\",\n", + " \"Faixa Remun Dezem (SM)\": \"faixa_remuneracao_dezembro_sm\",\n", + " \"Vl Remun Dezembro (SM)\": \"valor_remuneracao_dezembro_sm\",\n", + " \"Vl Rem Janeiro SC\": \"valor_remuneracao_janeiro\",\n", + " \"Vl Rem Fevereiro SC\": \"valor_remuneracao_fevereiro\",\n", + " \"Vl Rem Março SC\": \"valor_remuneracao_marco\",\n", + " \"Vl Rem Abril SC\": \"valor_remuneracao_abril\",\n", + " \"Vl Rem Maio SC\": \"valor_remuneracao_maio\",\n", + " \"Vl Rem Junho SC\": \"valor_remuneracao_junho\",\n", + " \"Vl Rem Julho SC\": \"valor_remuneracao_julho\",\n", + " \"Vl Rem Agosto SC\": \"valor_remuneracao_agosto\",\n", + " \"Vl Rem Setembro SC\": \"valor_remuneracao_setembro\",\n", + " \"Vl Rem Outubro SC\": \"valor_remuneracao_outubro\",\n", + " \"Vl Rem Novembro SC\": \"valor_remuneracao_novembro\",\n", + " \"Vl Remun Dezembro Nom\": \"valor_remuneracao_dezembro\",\n", + " \"CBO Ocupação 2002\": \"cbo_2002\",\n", + " \"Faixa Etária\": \"faixa_etaria\",\n", + " \"Idade\": \"idade\",\n", + " \"Escolaridade após 2005\": \"grau_instrucao_apos_2005\",\n", + " \"Nacionalidade\": \"nacionalidade\",\n", + " \"Sexo Trabalhador\": \"sexo\",\n", + " \"Raça Cor\": \"raca_cor\",\n", + " \"Ind Portador Defic\": \"indicador_portador_deficiencia\",\n", + " \"Tipo Defic\": \"tipo_deficiencia\",\n", + " \"Ano Chegada Brasil\": \"ano_chegada_brasil\",\n", + " \"IBGE Subsetor\": \"subsetor_ibge\",\n", + " \"CNAE 95 Classe\": \"cnae_1\",\n", + " \"CNAE 2.0 Classe\": \"cnae_2\",\n", + " \"CNAE 2.0 Subclasse\": \"cnae_2_subclasse\",\n", + " \"Tamanho Estabelecimento\": \"tamanho_estabelecimento\",\n", + " \"Tipo Estab\": \"tipo_estabelecimento\",\n", + " \"Natureza Jurídica\": \"natureza_juridica\",\n", + " \"Ind Simples\": \"indicador_simples\",\n", + " \"Bairros SP\": \"bairros_sp\",\n", + " \"Distritos SP\": \"distritos_sp\",\n", + " \"Bairros Fortaleza\": \"bairros_fortaleza\",\n", + " \"Bairros RJ\": \"bairros_rj\",\n", + " \"Regiões Adm DF\": \"regioes_administrativas_df\",\n", + " \"Município\": \"municipio\",\n", + " },\n", + " inplace=True,\n", + " )\n", + "\n", + " chunk[\"ano\"] = 2023\n", + "\n", + " chunk[[\"municipio\", \"id_municipio_trabalho\"]] = chunk[\n", + " [\"municipio\", \"id_municipio_trabalho\"]\n", + " ].astype(str)\n", + "\n", + " # Mescla com o arquivo de municípios\n", + "\n", + " chunk = pd.merge(\n", + " chunk,\n", + " df_municipio,\n", + " left_on=[\"municipio\"],\n", + " right_on=[\"id_municipio_6\"],\n", + " how=\"left\",\n", + " )\n", + "\n", + " chunk = pd.merge(\n", + " chunk,\n", + " df_municipio,\n", + " left_on=[\"id_municipio_trabalho\"],\n", + " right_on=[\"id_municipio_6\"],\n", + " how=\"left\",\n", + " )\n", + "\n", + " chunk.drop(\n", + " [\n", + " \"id_municipio_trabalho\",\n", + " \"municipio\",\n", + " \"id_municipio_6_x\",\n", + " \"id_municipio_6_y\",\n", + " \"sigla_uf_y\",\n", + " ],\n", + " axis=1,\n", + " inplace=True,\n", + " )\n", + "\n", + " chunk.rename(\n", + " columns={\n", + " \"id_municipio_x\": \"id_municipio\",\n", + " \"sigla_uf_x\": \"sigla_uf\",\n", + " \"id_municipio_y\": \"id_municipio_trabalho\",\n", + " },\n", + " inplace=True,\n", + " )\n", + "\n", + " chunk[\"sigla_uf\"].replace([np.nan, \"NI\"], \"IGNORADO\", inplace=True)\n", + "\n", + " vars_list = [\n", + " \"ano\",\n", + " \"sigla_uf\",\n", + " \"id_municipio\",\n", + " \"tipo_vinculo\",\n", + " \"vinculo_ativo_3112\",\n", + " \"tipo_admissao\",\n", + " \"mes_admissao\",\n", + " \"mes_desligamento\",\n", + " \"motivo_desligamento\",\n", + " \"causa_desligamento_1\",\n", + " \"causa_desligamento_2\",\n", + " \"causa_desligamento_3\",\n", + " \"faixa_tempo_emprego\",\n", + " \"tempo_emprego\",\n", + " \"faixa_horas_contratadas\",\n", + " \"quantidade_horas_contratadas\",\n", + " \"id_municipio_trabalho\",\n", + " \"quantidade_dias_afastamento\",\n", + " \"indicador_cei_vinculado\",\n", + " \"indicador_trabalho_parcial\",\n", + " \"indicador_trabalho_intermitente\",\n", + " \"faixa_remuneracao_media_sm\",\n", + " \"valor_remuneracao_media_sm\",\n", + " \"valor_remuneracao_media\",\n", + " \"faixa_remuneracao_dezembro_sm\",\n", + " \"valor_remuneracao_dezembro_sm\",\n", + " \"valor_remuneracao_janeiro\",\n", + " \"valor_remuneracao_fevereiro\",\n", + " \"valor_remuneracao_marco\",\n", + " \"valor_remuneracao_abril\",\n", + " \"valor_remuneracao_maio\",\n", + " \"valor_remuneracao_junho\",\n", + " \"valor_remuneracao_julho\",\n", + " \"valor_remuneracao_agosto\",\n", + " \"valor_remuneracao_setembro\",\n", + " \"valor_remuneracao_outubro\",\n", + " \"valor_remuneracao_novembro\",\n", + " \"valor_remuneracao_dezembro\",\n", + " \"tipo_salario\",\n", + " \"valor_salario_contratual\",\n", + " \"subatividade_ibge\",\n", + " \"subsetor_ibge\",\n", + " \"cbo_1994\",\n", + " \"cbo_2002\",\n", + " \"cnae_1\",\n", + " \"cnae_2\",\n", + " \"cnae_2_subclasse\",\n", + " \"faixa_etaria\",\n", + " \"idade\",\n", + " \"grau_instrucao_1985_2005\",\n", + " \"grau_instrucao_apos_2005\",\n", + " \"nacionalidade\",\n", + " \"sexo\",\n", + " \"raca_cor\",\n", + " \"indicador_portador_deficiencia\",\n", + " \"tipo_deficiencia\",\n", + " \"ano_chegada_brasil\",\n", + " \"tamanho_estabelecimento\",\n", + " \"tipo_estabelecimento\",\n", + " \"natureza_juridica\",\n", + " \"indicador_simples\",\n", + " \"bairros_sp\",\n", + " \"distritos_sp\",\n", + " \"bairros_fortaleza\",\n", + " \"bairros_rj\",\n", + " \"regioes_administrativas_df\",\n", + " ]\n", + "\n", + " for var in vars_list:\n", + "\n", + " if var not in chunk.columns:\n", + " chunk[var] = \"\"\n", + "\n", + " # Limpeza de variáveis\n", + " chunk = chunk.applymap(lambda x: x.strip() if isinstance(x, str) else x)\n", + "\n", + " # Limpeza de códigos inválidos\n", + "\n", + " invalid_codes_bairros = [\n", + " \"0000\",\n", + " \"00000\",\n", + " \"000000\",\n", + " \"0000000\",\n", + " \"0000-1\",\n", + " \"000-1\",\n", + " \"9999\",\n", + " \"9997\",\n", + " ]\n", + " for col in [\n", + " \"bairros_rj\",\n", + " \"bairros_sp\",\n", + " \"bairros_fortaleza\",\n", + " \"distritos_sp\",\n", + " \"regioes_administrativas_df\",\n", + " ]:\n", + "\n", + " chunk[col].replace(invalid_codes_bairros, \"\", inplace=True)\n", + "\n", + " # Mais substituições de códigos\n", + "\n", + " invalid_codes_general = [\"0000\", \"00000\", \"000000\", \"0000000\", \"0000-1\", \"000-1\"]\n", + "\n", + " for col in [\n", + " \"cbo_1994\",\n", + " \"cbo_2002\",\n", + " \"cnae_1\",\n", + " \"cnae_2\",\n", + " \"cnae_2_subclasse\",\n", + " \"ano_chegada_brasil\",\n", + " ]:\n", + "\n", + " chunk[col].replace(invalid_codes_general, \"\", inplace=True)\n", + "\n", + " chunk[\"mes_admissao\"].replace(\"00\", \"\", inplace=True)\n", + "\n", + " chunk[\"mes_desligamento\"].replace(\"00\", \"\", inplace=True)\n", + "\n", + " chunk[\"motivo_desligamento\"].replace(\"0\", \"\", inplace=True)\n", + "\n", + " chunk[\"causa_desligamento_1\"].replace(\"99\", \"\", inplace=True)\n", + "\n", + " chunk[\"raca_cor\"].replace(\"99\", \"9\", inplace=True)\n", + "\n", + " # Ajustes adicionais\n", + "\n", + " chunk[\"natureza_juridica\"].replace([\"9990\", \"9999\"], \"\", inplace=True)\n", + "\n", + " chunk[\"tipo_estabelecimento\"] = chunk[\"tipo_estabelecimento\"].replace(\n", + " [\"CNPJ\", \"Cnpj\", \"01\", \"1\"], \"1\"\n", + " )\n", + "\n", + " chunk[\"tipo_estabelecimento\"] = chunk[\"tipo_estabelecimento\"].replace(\"CAEPF\", \"2\")\n", + "\n", + " chunk[\"tipo_estabelecimento\"] = chunk[\"tipo_estabelecimento\"].replace(\n", + " [\"CEI\", \"Cei\", \"CEI/CNO\", \"Cei/Cno\", \"CNO\", \"Cno\", \"03\", \"3\"], \"3\"\n", + " )\n", + "\n", + " # Conversão de valores monetários\n", + "\n", + " monetary_vars = [\n", + " \"valor_remuneracao_janeiro\",\n", + " \"valor_remuneracao_fevereiro\",\n", + " \"valor_remuneracao_marco\",\n", + " \"valor_remuneracao_abril\",\n", + " \"valor_remuneracao_maio\",\n", + " \"valor_remuneracao_junho\",\n", + " \"valor_remuneracao_julho\",\n", + " \"valor_remuneracao_agosto\",\n", + " \"valor_remuneracao_setembro\",\n", + " \"valor_remuneracao_outubro\",\n", + " \"valor_remuneracao_novembro\",\n", + " \"valor_remuneracao_dezembro\",\n", + " \"valor_salario_contratual\",\n", + " \"valor_remuneracao_dezembro_sm\",\n", + " \"valor_remuneracao_media\",\n", + " \"valor_remuneracao_media_sm\",\n", + " ]\n", + "\n", + " for var in monetary_vars:\n", + "\n", + " chunk[var] = chunk[var].astype(str)\n", + "\n", + " chunk[var] = chunk[var].str.replace(\",\", \".\")\n", + "\n", + " chunk = chunk[vars_list]\n", + "\n", + " print(\"Particionando...\")\n", + "\n", + " to_partitions(\n", + " data=chunk,\n", + " partition_columns=[\"ano\", \"sigla_uf\"],\n", + " savepath=\"X:\\\\dados\\\\br_me_rais\\\\vinculos_2023\\\\vinculos\",\n", + " file_type=\"csv\",\n", + " )\n", + "\n", + " del chunk\n", + "\n", + " gc.collect()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/models/br_me_rais/schema.yml b/models/br_me_rais/schema.yml index 863e0cca..b58f2f05 100644 --- a/models/br_me_rais/schema.yml +++ b/models/br_me_rais/schema.yml @@ -135,39 +135,39 @@ models: description: Indicador Trabalho Parcial - name: indicador_trabalho_intermitente description: Indicador Trabalho Intermitente - - name: faixa_remun_media_sm + - name: faixa_remuneracao_media_sm description: Faixa Remuneração Média (Salários Mínimos) - - name: valor_remun_media_sm + - name: valor_remuneracao_media_sm description: Valor da Remuneração Média (Salários Mínimos) - - name: valor_remun_media_nominal + - name: valor_remuneracao_media description: Valor da Remuneração Média (Nominal) - - name: faixa_remun_dezembro_sm + - name: faixa_remuneracao_dezembro_sm description: Faixa Remuneração em Dezembro (Salários Mínimos) - - name: valor_remun_dezembro_sm + - name: valor_remuneracao_dezembro_sm description: Valor da Remuneração em Dezembro (Salários Mínimos) - - name: valor_remun_janeiro_nominal + - name: valor_remuneracao_janeiro description: Valor da Remuneração em Janeiro (Nominal) - - name: valor_remun_fevereiro_nominal + - name: valor_remuneracao_fevereiro description: Valor da Remuneração em Fevereiro (Nominal) - - name: valor_remun_marco_nominal + - name: valor_remuneracao_marco description: Valor da Remuneração em Março (Nominal) - - name: valor_remun_abril_nominal + - name: valor_remuneracao_abril description: Valor da Remuneração em Abril (Nominal) - - name: valor_remun_maio_nominal + - name: valor_remuneracao_maio description: Valor da Remuneração em Maio (Nominal) - - name: valor_remun_junho_nominal + - name: valor_remuneracao_junho description: Valor da Remuneração em Junho (Nominal) - - name: valor_remun_julho_nominal + - name: valor_remuneracao_julho description: Valor da Remuneração em Julho (Nominal) - - name: valor_remun_agosto_nominal + - name: valor_remuneracao_agosto description: Valor da Remuneração em Agosto (Nominal) - - name: valor_remun_setembro_nominal + - name: valor_remuneracao_setembro description: Valor da Remuneração em Setembro (Nominal) - - name: valor_remun_outubro_nominal + - name: valor_remuneracao_outubro description: Valor da Remuneração em Outubro (Nominal) - - name: valor_remun_novembro_nominal + - name: valor_remuneracao_novembro description: Valor da Remuneração em Novembro (Nominal) - - name: valor_remun_dezembro_nominal + - name: valor_remuneracao_dezembro description: Valor da Remuneração em Dezembro (Nominal) - name: tipo_salario description: Tipo do Salário @@ -220,7 +220,7 @@ models: tests: - relationships: to: ref('br_bd_diretorios_brasil__subatividade_ibge') - field: subatividade_ibge.subatividade_ibge + field: id_subatividade - name: subsetor_ibge description: Subsetor - IBGE - name: cnae_1 @@ -235,11 +235,9 @@ models: - name: cnae_2_subclasse description: Classificação Nacional de Atividades Econômicas (CNAE) 2.0 Subclasse tests: - - custom_relationships: - tags: [cnae] - to: ref('br_bd_diretorios_brasil__cnae_2') - field: subclasse - ignore_values: ['9999997', '8630505', '3312101', '9999999', 00000-1] + - relationships: + to: ref('br_bd_diretorios_brasil__cnae_2_subclasse') + field: cnae_2_subclasse.cnae_2_subclasse - name: tamanho_estabelecimento description: Tamanho do Estabelecimento - name: tipo_estabelecimento @@ -293,7 +291,7 @@ models: - relationships: to: ref('br_bd_diretorios_brasil__municipio') field: id_municipio - - name: quantidade_vinculos_ativo + - name: quantidade_vinculos_ativos description: Estoque de vínculos ativos em 31/12. - name: quantidade_vinculos_clt description: Estoque de vínculos, sob o regime CLT e Outros, ativos em 31/12 @@ -377,7 +375,7 @@ models: - name: subatividade_ibge description: Subatividade IBGE tests: - - custom_relationships: + - relationships: to: ref('br_bd_diretorios_brasil__subatividade_ibge') field: id_subatividade - name: cep diff --git a/models/br_ms_cnes/br_ms_cnes__dicionario.sql b/models/br_ms_cnes/br_ms_cnes__dicionario.sql index 8b1159e6..b6b6c255 100644 --- a/models/br_ms_cnes/br_ms_cnes__dicionario.sql +++ b/models/br_ms_cnes/br_ms_cnes__dicionario.sql @@ -5,6 +5,6 @@ select safe_cast(id_tabela as string) id_tabela, safe_cast(nome_coluna as string) nome_coluna, safe_cast(chave as string) chave, - safe_cast(replace(cobertura_temporal, '-1', '(1)') as string) cobertura_temporal, + safe_cast(cobertura_temporal as string) cobertura_temporal, safe_cast(valor as string) valor, from `basedosdados-staging.br_ms_cnes_staging.dicionario` as t diff --git a/models/br_ms_sim/br_ms_sim__dicionario.sql b/models/br_ms_sim/br_ms_sim__dicionario.sql index cd5d5419..41d561d9 100644 --- a/models/br_ms_sim/br_ms_sim__dicionario.sql +++ b/models/br_ms_sim/br_ms_sim__dicionario.sql @@ -1,5 +1,5 @@ {{ config(alias="dicionario", schema="br_ms_sim") }} --- Dicionário de dados do SIM + select safe_cast(id_tabela as string) id_tabela, safe_cast(coluna as string) nome_coluna, diff --git a/models/br_sfb_sicar/br_sfb_sicar__area_imovel.sql b/models/br_sfb_sicar/br_sfb_sicar__area_imovel.sql new file mode 100644 index 00000000..2c7ac700 --- /dev/null +++ b/models/br_sfb_sicar/br_sfb_sicar__area_imovel.sql @@ -0,0 +1,109 @@ +{{ + config( + alias="area_imovel", + schema="br_sfb_sicar", + materialized="incremental", + partition_by={ + "field": "data_atualizacao_car", + "data_type": "date", + "granularity": "day", + }, + cluster_by=["sigla_uf"], + ) +}} + +with + municipios_car as ( + select distinct + safe_cast(cod_estado as string) sigla_uf, + safe_cast(municipio as string) municipio_nome, + + from `basedosdados-staging.br_sfb_sicar_staging.area_imovel` as t + ), + + muncipios_car_diretorio as ( + select sd.*, m.id_municipio as id_municipio + from municipios_car sd + left join + `basedosdados.br_bd_diretorios_brasil.municipio` as m + on lower( + regexp_replace(normalize(sd.municipio_nome, nfd), r"[^a-zA-Z0-9\s]", "") + ) + = lower(regexp_replace(normalize(m.nome, nfd), r"[^a-zA-Z0-9\s]", "")) + and sd.sigla_uf = m.sigla_uf + ), + correcao_manual_falhas as ( + select + sigla_uf, + municipio_nome, + case + when sigla_uf = 'PE' and municipio_nome = 'Iguaracy' + then '2606903' + when sigla_uf = 'RN' and municipio_nome = 'Januario Cicco' + then '2405306' + when sigla_uf = 'RN' and municipio_nome = "Olho d'Agua do Borges" + then '2408409' + when sigla_uf = 'PA' and municipio_nome = "Santa Izabel do Para" + then '1506500' + when sigla_uf = 'SP' and municipio_nome = "Florinea" + then '3516101' + when sigla_uf = 'SP' and municipio_nome = "Sao Luiz do Paraitinga" + then '3550001' + when sigla_uf = 'SP' and municipio_nome = "Biritiba Mirim" + then '3506607' + when sigla_uf = 'MT' and municipio_nome = "Santo Antonio de Leverger" + then '5107800' + when sigla_uf = 'MT' and municipio_nome = "Poxoreu" + then '5107008' + when sigla_uf = 'BA' and municipio_nome = "Muquem do Sao Francisco" + then '2922250' + when sigla_uf = 'MG' and municipio_nome = "Passa Vinte" + then '3147808' + when sigla_uf = 'SE' and municipio_nome = "Amparo do Sao Francisco" + then '2800100' + when sigla_uf = 'BA' and municipio_nome = "Santa Terezinha" + then '2928505' + when sigla_uf = 'TO' and municipio_nome = "Tabocao" + then '1708254' + when sigla_uf = 'MG' and municipio_nome = "Dona Euzebia" + then '3122900' + when sigla_uf = 'MG' and municipio_nome = "Sao Tome das Letras" + then '3165206' + when sigla_uf = 'SC' and municipio_nome = "Grao-Para" + then '4206108' + when sigla_uf = 'CE' and municipio_nome = "Itapaje" + then '2306306' + else id_municipio + end as id_municipio + from muncipios_car_diretorio + ), + + final_table as ( + select + safe_cast(data_extracao as date) data_extracao, + safe_cast(data_atualizacao_car as date) data_atualizacao_car, + safe_cast(cod_estado as string) sigla_uf, + safe_cast(t2.id_municipio as string) id_municipio, + safe_cast(cod_imovel as string) id_imovel, + safe_cast(mod_fiscal as string) modulos_fiscais, + safe_cast(num_area as float64) area, + safe_cast(ind_status as string) status, + safe_cast(ind_tipo as string) tipo, + safe_cast(des_condic as string) condicao, + safe_cast( + safe.st_geogfromtext(geometry, make_valid => true) as geography + ) geometria, + from `basedosdados-staging.br_sfb_sicar_staging.area_imovel` as car + left join + correcao_manual_falhas as t2 + on car.sigla_uf = t2.sigla_uf + and car.municipio = t2.municipio_nome + + ) + +select * +from final_table + +{% if is_incremental() %} + where data_extracao > (select max(data_extracao) from {{ this }}) +{% endif %} diff --git a/models/br_sfb_sicar/br_sfb_sicar__dicionario.sql b/models/br_sfb_sicar/br_sfb_sicar__dicionario.sql new file mode 100644 index 00000000..92a71f0d --- /dev/null +++ b/models/br_sfb_sicar/br_sfb_sicar__dicionario.sql @@ -0,0 +1,16 @@ +{{ + config( + alias="dicionario", + schema="br_sfb_sicar", + materialized="table", + ) +}} + + +select + safe_cast(id_tabela as string) id_tabela, + safe_cast(nome_coluna as string) nome_coluna, + safe_cast(chave as string) chave, + safe_cast(cobertura_temporal as string) cobertura_temporal, + safe_cast(valor as string) valor, +from `basedosdados-staging.br_sfb_sicar_staging.dicionario` as t diff --git a/models/br_sfb_sicar/schema.yml b/models/br_sfb_sicar/schema.yml new file mode 100644 index 00000000..55dea679 --- /dev/null +++ b/models/br_sfb_sicar/schema.yml @@ -0,0 +1,69 @@ +--- +version: 2 +models: + - name: br_sfb_sicar__area_imovel + description: Esta tabela contém o polígono de demarcação da área dos imóveis com + cadastro ambiental rural. + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: [id_imovel] + - not_null_proportion_multiple_columns: + at_least: 0.95 + - custom_dictionary_coverage: + columns_covered_by_dictionary: [status, tipo] + dictionary_model: ref('br_sfb_sicar__dicionario') + columns: + - name: data_atualizacao_car + description: Data de atualização dos dados na fonte original. O data de atualização + pode variar a depender da Unidade da Federação. + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data + - name: sigla_uf + description: Sigla da Unidade da Federação (UF) onde se localiza o cadastro + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + - name: id_municipio + description: ID Município - IBGE 7 Dígitos onde se localiza o cadastro + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio + - name: id_imovel + description: Código de Inscrição no do Cadastro Ambiental Rural (CAR) + - name: modulos_fiscais + description: Quantidade de módulos fiscais do imóvel + - name: area + description: Área do imóvel + - name: status + description: Status do cadastro do imóvel + - name: tipo + description: Tipo do Imóvel Rural + - name: condicao + description: Condição em que o cadastro se encontra no fluxo de análise pelo + órgão competente + - name: geometria + description: Geometria do imóvel + - name: br_sfb_sicar__dicionario + description: Dicionário para tradução dos códigos das tabelas do conjunto br_sfb_sicar + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - id_tabela + - nome_coluna + - chave + - cobertura_temporal + columns: + - name: id_tabela + description: ID Tabela + - name: nome_coluna + description: Nome da coluna + - name: chave + description: Chave + - name: cobertura_temporal + description: Cobertura Temporal + - name: valor + description: Valor diff --git a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato.sql b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato.sql index ea7549c6..6be9f640 100644 --- a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato.sql +++ b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato.sql @@ -6,7 +6,7 @@ partition_by={ "field": "ano", "data_type": "int64", - "range": {"start": 1945, "end": 2022, "interval": 1}, + "range": {"start": 1945, "end": 2024, "interval": 1}, }, ) }} @@ -15,7 +15,7 @@ select safe_cast(turno as int64) turno, safe_cast(id_eleicao as string) id_eleicao, safe_cast(tipo_eleicao as string) tipo_eleicao, - safe_cast(data_eleicao as string) data_eleicao, + safe_cast(data_eleicao as date) data_eleicao, safe_cast(sigla_uf as string) sigla_uf, safe_cast(id_municipio as string) id_municipio, safe_cast(id_municipio_tse as string) id_municipio_tse, diff --git a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio.sql b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio.sql index b11e3dab..e6d6f982 100644 --- a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio.sql +++ b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio.sql @@ -6,7 +6,7 @@ partition_by={ "field": "ano", "data_type": "int64", - "range": {"start": 1998, "end": 2022, "interval": 2}, + "range": {"start": 1994, "end": 2024, "interval": 2}, }, cluster_by=["sigla_uf"], ) @@ -16,7 +16,7 @@ select safe_cast(turno as int64) turno, safe_cast(id_eleicao as string) id_eleicao, safe_cast(tipo_eleicao as string) tipo_eleicao, - safe_cast(data_eleicao as string) data_eleicao, + safe_cast(data_eleicao as date) data_eleicao, safe_cast(sigla_uf as string) sigla_uf, safe_cast(id_municipio as string) id_municipio, safe_cast(id_municipio_tse as string) id_municipio_tse, diff --git a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio_zona.sql b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio_zona.sql index 76798436..b2b14d25 100644 --- a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio_zona.sql +++ b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_candidato_municipio_zona.sql @@ -6,7 +6,7 @@ partition_by={ "field": "ano", "data_type": "int64", - "range": {"start": 1998, "end": 2022, "interval": 2}, + "range": {"start": 1994, "end": 2024, "interval": 2}, }, cluster_by=["sigla_uf"], ) @@ -16,7 +16,7 @@ select safe_cast(turno as int64) turno, safe_cast(id_eleicao as string) id_eleicao, safe_cast(tipo_eleicao as string) tipo_eleicao, - safe_cast(data_eleicao as string) data_eleicao, + safe_cast(data_eleicao as date) data_eleicao, safe_cast(sigla_uf as string) sigla_uf, safe_cast(id_municipio as string) id_municipio, safe_cast(id_municipio_tse as string) id_municipio_tse, diff --git a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio.sql b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio.sql index 5b13d924..f93eb9fd 100644 --- a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio.sql +++ b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio.sql @@ -6,7 +6,7 @@ partition_by={ "field": "ano", "data_type": "int64", - "range": {"start": 1994, "end": 2022, "interval": 2}, + "range": {"start": 1994, "end": 2024, "interval": 2}, }, cluster_by=["sigla_uf"], ) @@ -17,7 +17,7 @@ select safe_cast(turno as int64) turno, safe_cast(id_eleicao as string) id_eleicao, safe_cast(tipo_eleicao as string) tipo_eleicao, - safe_cast(data_eleicao as string) data_eleicao, + safe_cast(data_eleicao as date) data_eleicao, safe_cast(sigla_uf as string) sigla_uf, safe_cast(id_municipio as string) id_municipio, safe_cast(id_municipio_tse as string) id_municipio_tse, diff --git a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio_zona.sql b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio_zona.sql index 4dfa9e05..16fe21e2 100644 --- a/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio_zona.sql +++ b/models/br_tse_eleicoes/br_tse_eleicoes__resultados_partido_municipio_zona.sql @@ -6,7 +6,7 @@ partition_by={ "field": "ano", "data_type": "int64", - "range": {"start": 1994, "end": 2022, "interval": 2}, + "range": {"start": 1994, "end": 2024, "interval": 2}, }, cluster_by=["sigla_uf"], ) @@ -17,7 +17,7 @@ select safe_cast(turno as int64) turno, safe_cast(id_eleicao as string) id_eleicao, safe_cast(tipo_eleicao as string) tipo_eleicao, - safe_cast(data_eleicao as string) data_eleicao, + safe_cast(data_eleicao as date) data_eleicao, safe_cast(sigla_uf as string) sigla_uf, safe_cast(id_municipio as string) id_municipio, safe_cast(id_municipio_tse as string) id_municipio_tse, diff --git a/models/br_tse_eleicoes/schema.yml b/models/br_tse_eleicoes/schema.yml index b72411f2..a4a2015b 100644 --- a/models/br_tse_eleicoes/schema.yml +++ b/models/br_tse_eleicoes/schema.yml @@ -1174,7 +1174,7 @@ models: - numero_candidato - nome_candidato - not_null_proportion_multiple_columns: - at_least: 0.95 + at_least: 0.80 columns: - name: ano description: Ano @@ -1186,14 +1186,27 @@ models: description: Cargo - name: data_eleicao description: Data da eleição + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data - name: id_candidato_bd description: ID Candidato - Base dos Dados - name: id_eleicao description: ID Eleição - name: id_municipio description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio - name: id_municipio_tse description: ID Município - TSE + tests: + - custom_relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio_tse + ignore_values: ['73709'] - name: nome_candidato description: Nome do candidato - name: numero_candidato @@ -1208,6 +1221,11 @@ models: description: Sigla do partido - name: sigla_uf description: Sigla da unidade da federação + tests: + - custom_relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + ignore_values: [GB, GP, RB] - name: tipo_eleicao description: Tipo da eleição - name: turno @@ -1222,11 +1240,13 @@ models: - ano - turno - id_eleicao + - sigla_uf - id_municipio_tse + - cargo - sequencial_candidato - numero_candidato - not_null_proportion_multiple_columns: - at_least: 0.95 + at_least: 0.80 columns: - name: ano description: Ano @@ -1238,14 +1258,27 @@ models: description: Cargo - name: data_eleicao description: Data da eleição + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data - name: id_candidato_bd description: ID Candidato - Base dos Dados - name: id_eleicao description: ID Eleição - name: id_municipio description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio - name: id_municipio_tse description: ID Município - TSE + tests: + - custom_relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio_tse + ignore_values: ['73709'] - name: numero_candidato description: Número do candidato - name: numero_partido @@ -1258,6 +1291,11 @@ models: description: Sigla do partido - name: sigla_uf description: Sigla da unidade da federação + tests: + - custom_relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + ignore_values: [GB, GP, RB] - name: tipo_eleicao description: Tipo da eleição - name: turno @@ -1272,12 +1310,14 @@ models: - ano - turno - id_eleicao + - sigla_uf - id_municipio_tse + - cargo - zona - sequencial_candidato - numero_candidato - not_null_proportion_multiple_columns: - at_least: 0.95 + at_least: 0.80 columns: - name: ano description: Ano @@ -1289,14 +1329,27 @@ models: description: Cargo - name: data_eleicao description: Data da eleição + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data - name: id_candidato_bd description: ID Candidato - Base dos Dados - name: id_eleicao description: ID Eleição - name: id_municipio description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio - name: id_municipio_tse description: ID Município - TSE + tests: + - custom_relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio_tse + ignore_values: ['73709'] - name: numero_candidato description: Número do candidato - name: numero_partido @@ -1309,6 +1362,11 @@ models: description: Sigla do partido - name: sigla_uf description: Sigla da unidade da federação + tests: + - custom_relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + ignore_values: [GB, GP, RB] - name: tipo_eleicao description: Tipo da eleição - name: turno @@ -1383,7 +1441,7 @@ models: - cargo - numero_partido - not_null_proportion_multiple_columns: - at_least: 0.95 + at_least: 0.90 columns: - name: ano description: Ano @@ -1395,18 +1453,36 @@ models: description: Cargo - name: data_eleicao description: Data da eleição + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data - name: id_eleicao description: ID Eleição - name: id_municipio description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio - name: id_municipio_tse description: ID Município - TSE + tests: + - custom_relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio_tse + ignore_values: ['73709'] - name: numero_partido description: Número do partido - name: sigla_partido description: Sigla do partido - name: sigla_uf description: Sigla da unidade da federação + tests: + - custom_relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + ignore_values: [GB, GP, RB] - name: tipo_eleicao description: Tipo da eleição - name: turno @@ -1428,7 +1504,7 @@ models: - cargo - numero_partido - not_null_proportion_multiple_columns: - at_least: 0.95 + at_least: 0.90 columns: - name: ano description: Ano @@ -1440,18 +1516,36 @@ models: description: Cargo - name: data_eleicao description: Data da eleição + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data - name: id_eleicao description: ID Eleição - name: id_municipio description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio - name: id_municipio_tse description: ID Município - TSE + tests: + - custom_relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio_tse + ignore_values: ['73709'] - name: numero_partido description: Número do partido - name: sigla_partido description: Sigla do partido - name: sigla_uf description: Sigla da unidade da federação + tests: + - custom_relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + ignore_values: [GB, GP, RB] - name: tipo_eleicao description: Tipo da eleição - name: turno diff --git a/models/br_tse_filiacao_partidaria/br_tse_filiacao_partidaria__microdados.sql b/models/br_tse_filiacao_partidaria/br_tse_filiacao_partidaria__microdados.sql new file mode 100644 index 00000000..32c0d22e --- /dev/null +++ b/models/br_tse_filiacao_partidaria/br_tse_filiacao_partidaria__microdados.sql @@ -0,0 +1,51 @@ +{{ + config( + schema="br_tse_filiacao_partidaria", + alias="microdados", + materialized="table", + unique_key="registro_filiacao", + partition_by={ + "field": "data_extracao", + "data_type": "date", + }, + cluster_by=["sigla_uf"], + ) +}} +with + tabela as ( + select + safe_cast(sqregistrofiliacao as string) registro_filiacao, + safe_cast(sgpartido as string) sigla_partido, + safe_cast(sigla_uf as string) sigla_uf, + safe_cast(id_municipio as string) id_municipio, + safe_cast(codlocalidadetse as string) id_municipio_tse, + safe_cast(numzona as string) zona, + safe_cast(numsecao as string) secao, + safe_cast(nrtituloeleitor as string) titulo_eleitor, + safe_cast(numcpf as string) cpf, + safe_cast(nmeleitor as string) nome, + safe_cast(nmsocialeleitor as string) nome_social, + safe_cast(tpsexo as string) sexo, + safe_cast(dessituacaoeleitor as string) situacao_registro, + safe_cast(cdmotivodesfiliacao as string) motivo_desfiliacao, + safe_cast(cdmotivocancelamento as string) motivo_cancelamento, + safe_cast(indorigem as string) indicador_origem, + safe_cast(dtfiliacao as date) data_filiacao, + safe_cast(dtdesfiliacao as date) data_desfiliacao, + safe_cast(tscadastrodesfiliacao as date) data_cadastro_desfiliacao, + safe_cast(dtcancelamento as date) data_cancelamento, + safe_cast(dtexclusao as date) data_exclusao, + safe_cast(data_extracao as date) data_extracao, + from `basedosdados-staging.br_tse_filiacao_partidaria_staging.microdados` + ), + select_rows as ( + select + *, + row_number() over ( + partition by registro_filiacao order by data_extracao desc + ) as rn + from tabela + ) +select * except (rn) +from select_rows +where rn = 1 diff --git a/models/br_tse_filiacao_partidaria/br_tse_filiacao_partidaria__microdados_antigos.sql b/models/br_tse_filiacao_partidaria/br_tse_filiacao_partidaria__microdados_antigos.sql new file mode 100644 index 00000000..0edea6d0 --- /dev/null +++ b/models/br_tse_filiacao_partidaria/br_tse_filiacao_partidaria__microdados_antigos.sql @@ -0,0 +1,27 @@ +{{ + config( + schema="br_tse_filiacao_partidaria", + alias="microdados_antigos", + materialized="table", + cluster_by=["sigla_uf"], + ) +}} + +select + safe_cast(sigla_partido as string) sigla_partido, + safe_cast(sigla_uf as string) sigla_uf, + safe_cast(id_municipio as string) id_municipio, + safe_cast(id_municipio_tse as string) id_municipio_tse, + safe_cast(zona as int64) zona, + safe_cast(secao as int64) secao, + safe_cast(titulo_eleitoral as string) titulo_eleitoral, + safe_cast(nome as string) nome, + ({{ validate_date_range("data_filiacao", "1980-01-01") }}) as data_filiacao, + safe_cast(situacao_registro as string) situacao_registro, + safe_cast(tipo_registro as string) tipo_registro, + {{ validate_date_range("data_processamento", "1980-01-01") }} as data_processamento, + {{ validate_date_range("data_desfiliacao", "1980-01-01") }} as data_desfiliacao, + {{ validate_date_range("data_cancelamento", "1980-01-01") }} as data_cancelamento, + {{ validate_date_range("data_regularizacao", "1980-01-01") }} as data_regularizacao, + safe_cast(motivo_cancelamento as string) motivo_cancelamento, +from `basedosdados-staging.br_tse_filiacao_partidaria_staging.microdados_antigos` as t diff --git a/models/br_tse_filiacao_partidaria/schema.yml b/models/br_tse_filiacao_partidaria/schema.yml new file mode 100644 index 00000000..0afb543e --- /dev/null +++ b/models/br_tse_filiacao_partidaria/schema.yml @@ -0,0 +1,166 @@ +--- +version: 2 +models: + - name: br_tse_filiacao_partidaria__microdados_antigos + description: Microdados antigos de filiação partidária do TSE. + tests: + - custom_not_null_proportion_multiple_columns: + at_least: 0.10 + ignore_values: [data_regularizacao] + columns: + - name: sigla_partido + description: Sigla do partido + - name: sigla_uf + description: Sigla da unidade da federação + tests: + - custom_relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + ignore_values: [ZZ] + - name: id_municipio + description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio + - name: id_municipio_tse + description: ID Município - TSE + - name: zona + description: Zona eleitoral + - name: secao + description: Seção eleitoral + - name: titulo_eleitoral + description: Título eleitoral + - name: nome + description: Nome + - name: data_filiacao + description: Data da filiação + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data + - name: situacao_registro + description: Situação do registro + - name: tipo_registro + description: Tipo de registro + - name: data_processamento + description: Data de processamento + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data + - name: data_desfiliacao + description: Data de desfiliação + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data + - name: data_cancelamento + description: Data de cancelamento + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data + - name: data_regularizacao + description: Data de regularização + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data + - name: motivo_cancelamento + description: Motivo de cancelamento + - name: br_tse_filiacao_partidaria__microdados + description: Microdados de filiação partidária do TSE. + tests: + - custom_not_null_proportion_multiple_columns: + at_least: 0.55 + ignore_values: + - data_desfiliacao + - data_exclusao + - data_cadastro_desfiliacao + - motivo_desfiliacao + - motivo_cancelamento + - data_cancelamento + - nome_social + - dbt_utils.unique_combination_of_columns: + combination_of_columns: [registro_filiacao] + columns: + - name: registro_filiacao + description: Sequecia do registro filiação + - name: sigla_partido + description: Sigla do partido + - name: sigla_uf + description: Sigla da unidade da federação + tests: + - custom_relationships: + to: ref('br_bd_diretorios_brasil__uf') + field: sigla + ignore_values: [ZZ] + - name: id_municipio + description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio + - name: id_municipio_tse + description: ID Município - TSE + - name: zona + description: Zona eleitoral + - name: secao + description: Seção eleitoral + - name: titulo_eleitor + description: Título de eleitor + - name: cpf + description: Cadastro de pessoa física + - name: nome + description: Nome do eleitor + - name: nome_social + description: Nome social do eleitor + - name: sexo + description: Sexo + - name: situacao_registro + description: Situação do registro + - name: motivo_desfiliacao + description: Código do motivo da desfiliação. Não foi encontrada na documentação + a tradução deste código + - name: motivo_cancelamento + description: Código do motivo de cancelamento. Não foi encontrada na documentação + a tradução deste código + - name: indicador_origem + description: Não foi encontrado uma definição na documentação para este campo + - name: data_filiacao + description: Data da filiação + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data + - name: data_desfiliacao + description: Data de desfiliação + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data + - name: data_cadastro_desfiliacao + description: Data do cadastro da desfiliação + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data + - name: data_cancelamento + description: Data de cancelamento + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data + - name: data_exclusao + description: Data de exclusão + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data + - name: data_extracao + description: Data de extração da linha + tests: + - relationships: + to: ref('br_bd_diretorios_data_tempo__data') + field: data.data diff --git a/tests/generic/custom_null_proportion_multiple_columns.sql b/tests/generic/custom_null_proportion_multiple_columns.sql new file mode 100644 index 00000000..e3a3bde2 --- /dev/null +++ b/tests/generic/custom_null_proportion_multiple_columns.sql @@ -0,0 +1,71 @@ +{% test custom_not_null_proportion_multiple_columns( + model, ignore_values, at_least=0.05 +) %} + + {%- set columns = adapter.get_columns_in_relation(model) -%} + {% set suffix = "_nulls" %} + {% set pivot_columns_query %} + + with null_counts as( + + select + {% for column in columns -%} + {% if column.name not in ignore_values %} + SUM(CASE WHEN {{ column.name }} IS NULL THEN 1 ELSE 0 END) AS {{ column.name }}{{ suffix }}, + {% endif %} + {%- endfor %} + count(*) as total_records + from {{ model }} + ), + + pivot_columns as ( + + {% for column in columns -%} + {% if column.name not in ignore_values %} + select '{{ column.name }}' as column_name, {{ column.name }}{{ suffix }} as quantity, total_records + from null_counts + {% if not loop.last %}union all {% endif %} + {% endif %} + {%- endfor %} + ), + + faulty_columns as ( + select + * + from pivot_columns + where + quantity / total_records > (1 - {{ at_least }}) + + + ) + select * from faulty_columns + {% endset %} + with + validation_errors as ( + {%- set errors = dbt_utils.get_query_results_as_dict( + pivot_columns_query + ) -%} + {% if errors["column_name"] != () %} + {% for e in errors["column_name"] | unique %} + {{ + log( + "LOG: Coluna com preenchimento menor que " + ~ at_least * 100 + ~ "% ---> " + ~ e + ~ " [FAIL]", + info=True, + ) + }} + select '{{e}}' as column + {% if not loop.last %} + union all + {% endif %} + {% endfor %} + ) + select * + from validation_errors + {% else %}select 1 as column) select * from validation_errors where column != 1 + {% endif %} + +{% endtest %}