Skip to content

Commit

Permalink
Merge pull request #704 from basedosdados/staging/br_bd_diretorios_br…
Browse files Browse the repository at this point in the history
…asil.instituicao_ensino_superior

[dados]br_bd_diretorios_brasil.instituicao_ensino_superior
  • Loading branch information
Winzen authored Jul 29, 2024
2 parents 0d6217d + d66a35b commit d291a81
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
materialized="table",
)
}}

-- Atualizado com mais 883 id_ies unicos: 07/2024
select
safe_cast(id_ies as string) id_ies,
safe_cast(nome as string) nome,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "gAOeYQ6uRoMK"
},
"source": [
"# Import And Functions"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "FnKu0DwdKZDu"
},
"outputs": [],
"source": [
"from IPython.display import clear_output\n",
"!pip install basedosdados==2.0.0b16\n",
"clear_output()\n",
"import pandas as pd\n",
"import concurrent.futures\n",
"import basedosdados as bd\n",
"import os\n",
"\n",
"\n",
"def dicionatio_categoria(ano: int, categoria: str) -> list:\n",
"\n",
" dicionario = {\n",
" \"1\": \"Federal\",\n",
" \"2\": \"Estadual\",\n",
" \"3\": \"Municipal\",\n",
" \"7\": \"Especial\"\n",
" }\n",
"\n",
" status = \"Ativa\" if ano == 2022 else \"Inativa\"\n",
"\n",
" if categoria in (\"4\", \"5\", \"6\", \"8\", \"9\"):\n",
" return [\"Privada\", \"Privada\", status]\n",
" else:\n",
" return [\"Público\", dicionario[categoria], status]\n",
"\n",
"\n",
"def formar_linha(source_df: pd.DataFrame, id: str):\n",
"\n",
" mask = source_df.id_ies == id\n",
"\n",
" row = source_df[mask].sort_values(\n",
" by='ano', ascending=False).iloc[0].to_list()\n",
"\n",
" categoria = row.pop(3)\n",
" row += dicionatio_categoria(ano=row[0], categoria=categoria)\n",
"\n",
" return row\n",
"\n",
"def formar_df(source_df: pd.DataFrame) -> pd.DataFrame:\n",
"\n",
" columns = [\"ano\", \"id_ies\", \"nome\", \"id_municipio\",\n",
" \"sigla_uf\", \"tipo_instituicao\", \"rede\", \"situacao_funcionamento\"]\n",
"\n",
" ids_ies = source_df.id_ies.unique()\n",
"\n",
" with concurrent.futures.ThreadPoolExecutor() as executor:\n",
"\n",
" linhas = [row for row in executor.map(lambda id: formar_linha(source_df, id), ids_ies)]\n",
"\n",
" df = pd.DataFrame(linhas, columns=columns)\n",
"\n",
" return df\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Nfao_e3kRvlb"
},
"source": [
"# Gerar Tabela Atualizada"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "lNPxE4d3Ts9Z"
},
"outputs": [],
"source": [
"billing_project_id = # Definir o seu \"billing_project_id\" do google\n",
"\n",
"query_educacao_superior = '''\n",
"SELECT ano, id_ies, nome, tipo_categoria_administrativa, id_municipio, sigla_uf FROM `basedosdados.br_inep_censo_educacao_superior.ies`\n",
"'''\n",
"query_ensino_superior = '''\n",
"SELECT * FROM `basedosdados.br_bd_diretorios_brasil.instituicao_ensino_superior`\n",
"'''\n",
"# Atual versão do \"br_inep_censo_educacao_superior.ies\" no BD\n",
"educacao_superior = bd.read_sql(query_educacao_superior, billing_project_id=billing_project_id)\n",
"# Atual versão do \"br_bd_diretorios_brasil.instituicao_ensino_superior\" no BD\n",
"instituicao_ensino_superior = bd.read_sql(query_ensino_superior, billing_project_id=billing_project_id)\n",
"\n",
"df = formar_df(educacao_superior)\n",
"\n",
"ensino_superior_to_merge = instituicao_ensino_superior[~instituicao_ensino_superior.id_ies.isin(df.id_ies.values)]\n",
"\n",
"df_to_merge = df.loc[:, instituicao_ensino_superior.columns]\n",
"\n",
"df_atualizada = pd.concat([ensino_superior_to_merge, df_to_merge])\n",
"\n",
"df_atualizada.nome = df_atualizada.nome.str.title()\n",
"\n",
"os.makedirs(\"output\", exist_ok=True)\n",
"\n",
"df_atualizada.to_csv(\"output/instituicao_ensino_superior.csv\", index=False)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
3 changes: 2 additions & 1 deletion models/br_inep_censo_escolar/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,10 @@ models:
- name: id_ies_ofertante
description: Código da Instituição de Ensino Superior (IES) vinculada à escola
tests:
- relationships:
- custom_relationships:
to: ref('br_bd_diretorios_brasil__instituicao_ensino_superior')
field: id_ies
ignore_values: ['21869', '21869', '21869', '18210', '19333', '21869'] # 6 ocorrências unicas de incompatibilidade
- name: local_funcionamento_predio_escolar
description: O local de funcionamento da escola é um prédio escolar
- name: tipo_local_funcionamento_predio_escolar
Expand Down

0 comments on commit d291a81

Please sign in to comment.