Skip to content

Commit

Permalink
Merge pull request #577 from basedosdados/br_bd_diretorios_mundo
Browse files Browse the repository at this point in the history
[dados]br_bd_diretorios_mundo.pais
  • Loading branch information
Winzen authored Jul 23, 2024
2 parents b302283 + c77fd65 commit df6bd78
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ select
safe_cast(sigla_pais_iso3 as string) sigla_pais_iso3,
safe_cast(sigla_pais_iso2 as string) sigla_pais_iso2,
safe_cast(sigla_pais_pnud as string) sigla_pais_pnud,
safe_cast(sigla_pais_coi as string) sigla_pais_coi,
safe_cast(sigla_pais_fifa as string) sigla_pais_fifa,
safe_cast(nome as string) nome,
safe_cast(nome_ingles as string) nome_ingles,
safe_cast(nome_oficial_ingles as string) nome_oficial_ingles,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pandas as pd
import os

# Antigo arquivo de pais sem COI e FIFA
original = pd.read_csv("https://storage.googleapis.com/basedosdados/staging/br_bd_diretorios_mundo/pais/pais.csv")

html = 'https://pt.wikipedia.org/wiki/Compara%C3%A7%C3%A3o_entre_c%C3%B3digos_de_pa%C3%ADses_COI,_FIFA,_e_ISO_3166'

attrs = {'class': 'wikitable'}

dfs = pd.read_html(html, attrs=attrs, header=0)

df_dif = pd.concat([df for df in dfs])

ico_fifa = df_dif[["COI", "FIFA", "ISO-3"]]
ico_fifa.columns = ["sigla_pais_coi", "sigla_pais_fifa", "sigla_pais_iso3"]

merge_df = original.merge(ico_fifa, left_on="sigla_pais_iso3", right_on='sigla_pais_iso3', how="left")

os.makedirs("output", exist_ok=True)

merge_df.to_csv("output/pais.csv", index=False)
67 changes: 67 additions & 0 deletions models/br_bd_diretorios_mundo/code/download_sh4_ncm.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import requests\n",
"from io import BytesIO\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"urls = {\n",
" 'ncm' : 'https://balanca.economia.gov.br/balanca/bd/tabelas/NCM.csv',\n",
" 'sh4': 'https://balanca.economia.gov.br/balanca/bd/tabelas/NCM_SH.csv'\n",
" }\n",
"\n",
"for k in urls:\n",
" print(f'Baixando tabela {k}')\n",
" \n",
" response = requests.get(urls[k], verify=False)\n",
" if response.status_code == 200:\n",
" \n",
" data = BytesIO(response.content)\n",
" df = pd.read_csv(data, encoding='latin-1', sep=';', dtype = str)\n",
" df.to_csv(\n",
" f'{k}.csv',\n",
" index=False,\n",
" encoding = 'utf-8',\n",
" sep=',', \n",
" )\n",
" else:\n",
" print(f'Erro! O status da requisição foi {response.status_code}')\n",
"\n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "bd_pipelines",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
4 changes: 4 additions & 0 deletions models/br_bd_diretorios_mundo/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ models:
description: Sigla do País - ISO2
- name: sigla_pais_pnud
description: Sigla do País - PNUD
- name: sigla_pais_coi
description: Sigla do País - COI
- name: sigla_pais_fifa
description: Sigla do País - FIFA
- name: nome
description: Nome do país em português
- name: nome_ingles
Expand Down

0 comments on commit df6bd78

Please sign in to comment.