Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dados] br_inpe_prodes #303

Merged
merged 5 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,10 @@ models:
+schema: br_inep_saeb
br_inmet_bdmep:
+materialized: table
+schema: br_inmet_bdmep
+schema: br_inmet_bdmep
br_inpe_prodes:
+materialized: table
+schema: br_inpe_prodes
br_jota:
+materialized: table
+schema: br_jota
Expand Down
16 changes: 16 additions & 0 deletions models/br_inpe_prodes/br_inpe_prodes_desmatamento_municipio.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{{
config(
alias='desmatamento_municipio',
schema='br_inpe_prodes'
)
}}
SELECT
SAFE_CAST(ano AS INT64) ano,
SAFE_CAST(id_municipio AS STRING) id_municipio,
SAFE_CAST(bioma AS STRING) bioma,
SAFE_CAST(area AS INT64) area,
SAFE_CAST(desmatamento AS FLOAT64) desmatamento,
SAFE_CAST(floresta AS FLOAT64) floresta,
SAFE_CAST(nao_floresta AS FLOAT64) nao_floresta,
SAFE_CAST(hidrografia AS FLOAT64) hidrografia
FROM basedosdados-staging.br_inpe_prodes_staging.desmatamento_municipio AS t
35 changes: 35 additions & 0 deletions models/br_inpe_prodes/code/clean_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import os
import pandas as pd


def gerar_individual_table(file_name: str):
df = pd.read_csv(file_name, encoding="ISO-8859-1")
file_name = file_name.split("/")[-1]
file_name = file_name[:-4]
bioma, year = file_name.split("_")
df["bioma"] = bioma
df["ano"] = int(year)
df["desmatamento"] = (df[df.columns[7:-5]].sum(axis=1)).round(1)
df = df[df.columns[[4, 6]].to_list() + df.columns[-6:].to_list()]

columns_order = ['ano', 'CodIbge', 'bioma', 'AreaKm2', 'desmatamento'] + df.columns[2:5].to_list()
df = df[columns_order]

columns_name = ['ano', 'id_municipio', 'bioma', 'area', 'desmatamento', 'floresta', 'nao_floresta', 'hidrografia']
df.columns = columns_name

df["floresta"] = (df['area'].values - df[['desmatamento', 'nao_floresta', 'hidrografia']].sum(axis=1)).round(1)

return df


def get_clean_data() -> None:

files = os.listdir(os.getcwd().replace("code", "input"))
files = [f"../input/{file}" for file in files]
df = pd.concat(map(gerar_individual_table, files))
df.to_csv("../output/br_inpe_prodes_desmatamento.csv", index=False)


if __name__ == "__main__":
get_clean_data()
47 changes: 47 additions & 0 deletions models/br_inpe_prodes/code/download_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import requests
import concurrent.futures
from itertools import product


class DownloaderBrInpeProdes:

def __init__(self, year: int):
self.year = year
url_base = f"http://www.dpi.inpe.br/prodesdigital/tabelatxt.php?ano={self.year}"
self.biomas_urls = {
"Amazônia": f"{url_base}&estado=&bioma=Amaz%C3%B4nia&ordem=municipio&type=tabela&output=txt&",
"Caatinga": f"{url_base}&estado=&bioma=Caatinga&ordem=municipio&type=tabela&output=txt&",
"Cerrado": f"{url_base}&estado=&bioma=Cerrado&ordem=municipio&type=tabela&output=txt&",
"Mata Atlântica": f"{url_base}&estado=&bioma=Mata%20Atl%C3%A2ntica&ordem=municipio&type=tabela&output=txt&",
"Pampa": f"{url_base}&estado=&bioma=Pampa&ordem=municipio&type=tabela&output=txt&",
"Pantanal": f"{url_base}&estado=&bioma=Pantanal&ordem=municipio&type=tabela&output=txt&"
}

def url(self, bioma: str) -> str:
return self.biomas_urls[bioma]

def download_data(self, bioma: str, format="csv") -> None:
url = self.url(bioma)
response = requests.get(url)
response.raise_for_status()

with open(f"../input/{bioma}_{self.year}.{format}", "w") as f:
content_as_string = response.content.decode()
f.write(content_as_string)

print(f"Successfully downloaded for bioma {bioma} year {self.year}")


def start_downloader(info: tuple) -> None:
DownloaderBrInpeProdes(info[0]).download_data(info[1])


def download_all_biomas_data() -> None:
years = range(2000, 2023)
biomas_names = ['Amazônia', 'Caatinga', 'Cerrado', 'Mata Atlântica', 'Pampa', 'Pantanal']
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(start_downloader, product(years, biomas_names))


if __name__ == "__main__":
download_all_biomas_data()
22 changes: 22 additions & 0 deletions models/br_inpe_prodes/schema.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
version: 2

models:
- name: desmatamento_municipio
description: Apresenta o desmatamento dos municipios e outras variáveis na Amazônia Legal a partir do ano 2000
columns:
- name: ano
description: Ano
- name: id_municipio
description: ID Município - IBGE 7 Dígitos
- name: bioma
description: Bioma
- name: area
description: Área
- name: desmatamento
description: Área Desmatada Total
- name: floresta
description: Área de Floresta
- name: nao_floresta
description: Área de Não-Floresta
- name: hidrografia
description: Área de Hidrografia
Loading