diff --git a/dbt_project.yml b/dbt_project.yml index 01175af6..b893b960 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -156,7 +156,10 @@ models: +schema: br_inep_saeb br_inmet_bdmep: +materialized: table - +schema: br_inmet_bdmep + +schema: br_inmet_bdmep + br_inpe_prodes: + +materialized: table + +schema: br_inpe_prodes br_jota: +materialized: table +schema: br_jota diff --git a/models/br_inpe_prodes/br_inpe_prodes_desmatamento_municipio.sql b/models/br_inpe_prodes/br_inpe_prodes_desmatamento_municipio.sql new file mode 100644 index 00000000..e0e5529d --- /dev/null +++ b/models/br_inpe_prodes/br_inpe_prodes_desmatamento_municipio.sql @@ -0,0 +1,16 @@ +{{ + config( + alias='desmatamento_municipio', + schema='br_inpe_prodes' + ) +}} +SELECT +SAFE_CAST(ano AS INT64) ano, +SAFE_CAST(id_municipio AS STRING) id_municipio, +SAFE_CAST(bioma AS STRING) bioma, +SAFE_CAST(area AS INT64) area, +SAFE_CAST(desmatamento AS FLOAT64) desmatamento, +SAFE_CAST(floresta AS FLOAT64) floresta, +SAFE_CAST(nao_floresta AS FLOAT64) nao_floresta, +SAFE_CAST(hidrografia AS FLOAT64) hidrografia +FROM basedosdados-staging.br_inpe_prodes_staging.desmatamento_municipio AS t diff --git a/models/br_inpe_prodes/code/clean_data.py b/models/br_inpe_prodes/code/clean_data.py new file mode 100644 index 00000000..1ac01ffc --- /dev/null +++ b/models/br_inpe_prodes/code/clean_data.py @@ -0,0 +1,35 @@ +import os +import pandas as pd + + +def gerar_individual_table(file_name: str): + df = pd.read_csv(file_name, encoding="ISO-8859-1") + file_name = file_name.split("/")[-1] + file_name = file_name[:-4] + bioma, year = file_name.split("_") + df["bioma"] = bioma + df["ano"] = int(year) + df["desmatamento"] = (df[df.columns[7:-5]].sum(axis=1)).round(1) + df = df[df.columns[[4, 6]].to_list() + df.columns[-6:].to_list()] + + columns_order = ['ano', 'CodIbge', 'bioma', 'AreaKm2', 'desmatamento'] + df.columns[2:5].to_list() + df = df[columns_order] + + columns_name = ['ano', 'id_municipio', 'bioma', 'area', 'desmatamento', 'floresta', 'nao_floresta', 'hidrografia'] + df.columns = columns_name + + df["floresta"] = (df['area'].values - df[['desmatamento', 'nao_floresta', 'hidrografia']].sum(axis=1)).round(1) + + return df + + +def get_clean_data() -> None: + + files = os.listdir(os.getcwd().replace("code", "input")) + files = [f"../input/{file}" for file in files] + df = pd.concat(map(gerar_individual_table, files)) + df.to_csv("../output/br_inpe_prodes_desmatamento.csv", index=False) + + +if __name__ == "__main__": + get_clean_data() diff --git a/models/br_inpe_prodes/code/download_data.py b/models/br_inpe_prodes/code/download_data.py new file mode 100644 index 00000000..1c037991 --- /dev/null +++ b/models/br_inpe_prodes/code/download_data.py @@ -0,0 +1,47 @@ +import requests +import concurrent.futures +from itertools import product + + +class DownloaderBrInpeProdes: + + def __init__(self, year: int): + self.year = year + url_base = f"http://www.dpi.inpe.br/prodesdigital/tabelatxt.php?ano={self.year}" + self.biomas_urls = { + "Amazônia": f"{url_base}&estado=&bioma=Amaz%C3%B4nia&ordem=municipio&type=tabela&output=txt&", + "Caatinga": f"{url_base}&estado=&bioma=Caatinga&ordem=municipio&type=tabela&output=txt&", + "Cerrado": f"{url_base}&estado=&bioma=Cerrado&ordem=municipio&type=tabela&output=txt&", + "Mata Atlântica": f"{url_base}&estado=&bioma=Mata%20Atl%C3%A2ntica&ordem=municipio&type=tabela&output=txt&", + "Pampa": f"{url_base}&estado=&bioma=Pampa&ordem=municipio&type=tabela&output=txt&", + "Pantanal": f"{url_base}&estado=&bioma=Pantanal&ordem=municipio&type=tabela&output=txt&" + } + + def url(self, bioma: str) -> str: + return self.biomas_urls[bioma] + + def download_data(self, bioma: str, format="csv") -> None: + url = self.url(bioma) + response = requests.get(url) + response.raise_for_status() + + with open(f"../input/{bioma}_{self.year}.{format}", "w") as f: + content_as_string = response.content.decode() + f.write(content_as_string) + + print(f"Successfully downloaded for bioma {bioma} year {self.year}") + + +def start_downloader(info: tuple) -> None: + DownloaderBrInpeProdes(info[0]).download_data(info[1]) + + +def download_all_biomas_data() -> None: + years = range(2000, 2023) + biomas_names = ['Amazônia', 'Caatinga', 'Cerrado', 'Mata Atlântica', 'Pampa', 'Pantanal'] + with concurrent.futures.ThreadPoolExecutor() as executor: + executor.map(start_downloader, product(years, biomas_names)) + + +if __name__ == "__main__": + download_all_biomas_data() diff --git a/models/br_inpe_prodes/schema.yaml b/models/br_inpe_prodes/schema.yaml new file mode 100644 index 00000000..494e9d74 --- /dev/null +++ b/models/br_inpe_prodes/schema.yaml @@ -0,0 +1,22 @@ +version: 2 + +models: + - name: desmatamento_municipio + description: Apresenta o desmatamento dos municipios e outras variáveis na Amazônia Legal a partir do ano 2000 + columns: + - name: ano + description: Ano + - name: id_municipio + description: ID Município - IBGE 7 Dígitos + - name: bioma + description: Bioma + - name: area + description: Área + - name: desmatamento + description: Área Desmatada Total + - name: floresta + description: Área de Floresta + - name: nao_floresta + description: Área de Não-Floresta + - name: hidrografia + description: Área de Hidrografia