diff --git a/dbt_project.yml b/dbt_project.yml index b5dde347..9005a053 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -166,6 +166,9 @@ models: br_inpe_prodes: +materialized: table +schema: br_inpe_prodes + br_inpe_queimadas: + +materialized: table + +schema: br_inpe_queimadas br_jota: +materialized: table +schema: br_jota diff --git a/models/br_inpe_queimadas/br_inpe_queimadas__microdados.sql b/models/br_inpe_queimadas/br_inpe_queimadas__microdados.sql new file mode 100644 index 00000000..7e21c846 --- /dev/null +++ b/models/br_inpe_queimadas/br_inpe_queimadas__microdados.sql @@ -0,0 +1,19 @@ +{{ + config( + alias = 'microdados', + schema = "br_inpe_queimadas", + materialized = "table", + labels = {"tema": "meio-ambiente"} + ) + }} +SELECT +SAFE_CAST(ano AS INT64) ano, +SAFE_CAST(sigla_uf AS STRING) sigla_uf, +SAFE_CAST(id_municipio AS STRING) id_municipio, +SAFE_CAST(bioma AS STRING) bioma, +SAFE_CAST(id_bdq AS STRING) id_bdq, +SAFE_CAST(id_foco AS STRING) id_foco, +SAFE_CAST(data_hora AS DATETIME) data_hora, +ST_GEOGPOINT(SAFE_CAST (longitude AS FLOAT64), SAFE_CAST (latitude AS FLOAT64)) centroide, +FROM basedosdados-staging.br_inpe_queimadas_staging.microdados AS t + diff --git a/models/br_inpe_queimadas/code/[dados]_br_inpe_queimadas.ipynb b/models/br_inpe_queimadas/code/[dados]_br_inpe_queimadas.ipynb new file mode 100644 index 00000000..8dbdc0cf --- /dev/null +++ b/models/br_inpe_queimadas/code/[dados]_br_inpe_queimadas.ipynb @@ -0,0 +1,397 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "source": [ + "!pip install basedosdados" + ], + "metadata": { + "id": "cAuDOC62l1Yx", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "08fbd8c1-0045-40f1-fb03-5d9aed23cbb5" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting basedosdados\n", + " Downloading basedosdados-1.6.11-py3-none-any.whl (51 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.1/51.1 kB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting Jinja2==3.0.3 (from basedosdados)\n", + " Downloading Jinja2-3.0.3-py3-none-any.whl (133 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m133.6/133.6 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting ckanapi==4.6 (from basedosdados)\n", + " Downloading ckanapi-4.6.tar.gz (32 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting click==8.0.3 (from basedosdados)\n", + " Downloading click-8.0.3-py3-none-any.whl (97 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.5/97.5 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting google-cloud-bigquery==2.30.1 (from basedosdados)\n", + " Downloading google_cloud_bigquery-2.30.1-py2.py3-none-any.whl (203 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m204.0/204.0 kB\u001b[0m \u001b[31m23.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting google-cloud-bigquery-storage==1.1.0 (from basedosdados)\n", + " Downloading google_cloud_bigquery_storage-1.1.0-py2.py3-none-any.whl (135 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.2/135.2 kB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting google-cloud-storage==1.42.3 (from basedosdados)\n", + " Downloading google_cloud_storage-1.42.3-py2.py3-none-any.whl (105 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.0/106.0 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting importlib-metadata<5.0.0,>=4.11.3 (from basedosdados)\n", + " Downloading importlib_metadata-4.13.0-py3-none-any.whl (23 kB)\n", + "Collecting loguru<0.7.0,>=0.6.0 (from basedosdados)\n", + " Downloading loguru-0.6.0-py3-none-any.whl (58 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pandas<2.0.0,>=1.3.5 in /usr/local/lib/python3.10/dist-packages (from basedosdados) (1.5.3)\n", + "Requirement already satisfied: pandas-gbq<0.18.0,>=0.17.4 in /usr/local/lib/python3.10/dist-packages (from basedosdados) (0.17.9)\n", + "Collecting pandavro<2.0.0,>=1.6.0 (from basedosdados)\n", + " Downloading pandavro-1.7.2-py3-none-any.whl (8.8 kB)\n", + "Collecting pyaml==20.4.0 (from basedosdados)\n", + " Downloading pyaml-20.4.0-py2.py3-none-any.whl (17 kB)\n", + "Collecting pyarrow==6.0.0 (from basedosdados)\n", + " Downloading pyarrow-6.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m25.6/25.6 MB\u001b[0m \u001b[31m56.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting ruamel.yaml==0.17.10 (from basedosdados)\n", + " Downloading ruamel.yaml-0.17.10-py3-none-any.whl (108 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m108.4/108.4 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting shapely<2.0.0,>=1.6.0 (from basedosdados)\n", + " Downloading Shapely-1.8.5.post1-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m92.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: toml<0.11.0,>=0.10.2 in /usr/local/lib/python3.10/dist-packages (from basedosdados) (0.10.2)\n", + "Collecting tomlkit==0.7.0 (from basedosdados)\n", + " Downloading tomlkit-0.7.0-py2.py3-none-any.whl (32 kB)\n", + "Collecting tqdm==4.50.2 (from basedosdados)\n", + " Downloading tqdm-4.50.2-py2.py3-none-any.whl (70 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.9/70.9 kB\u001b[0m \u001b[31m10.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from ckanapi==4.6->basedosdados) (67.7.2)\n", + "Collecting docopt (from ckanapi==4.6->basedosdados)\n", + " Downloading docopt-0.6.2.tar.gz (25 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from ckanapi==4.6->basedosdados) (2.31.0)\n", + "Requirement already satisfied: python-slugify>=1.0 in /usr/local/lib/python3.10/dist-packages (from ckanapi==4.6->basedosdados) (8.0.1)\n", + "Requirement already satisfied: six<2.0,>=1.9 in /usr/local/lib/python3.10/dist-packages (from ckanapi==4.6->basedosdados) (1.16.0)\n", + "Requirement already satisfied: grpcio<2.0dev,>=1.38.1 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados) (1.59.2)\n", + "Requirement already satisfied: google-api-core[grpc]<3.0.0dev,>=1.29.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados) (2.11.1)\n", + "Requirement already satisfied: proto-plus>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados) (1.22.3)\n", + "Requirement already satisfied: google-cloud-core<3.0.0dev,>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados) (2.3.3)\n", + "Requirement already satisfied: google-resumable-media<3.0dev,>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados) (2.6.0)\n", + "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados) (23.2)\n", + "Requirement already satisfied: protobuf>=3.12.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados) (3.20.3)\n", + "Requirement already satisfied: python-dateutil<3.0dev,>=2.7.2 in /usr/local/lib/python3.10/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados) (2.8.2)\n", + "Collecting google-api-core[grpc]<3.0.0dev,>=1.29.0 (from google-cloud-bigquery==2.30.1->basedosdados)\n", + " Downloading google_api_core-1.34.0-py3-none-any.whl (120 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m120.2/120.2 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: google-auth<3.0dev,>=1.25.0 in /usr/local/lib/python3.10/dist-packages (from google-cloud-storage==1.42.3->basedosdados) (2.17.3)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from Jinja2==3.0.3->basedosdados) (2.1.3)\n", + "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from pyaml==20.4.0->basedosdados) (6.0.1)\n", + "Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.10/dist-packages (from pyarrow==6.0.0->basedosdados) (1.23.5)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<5.0.0,>=4.11.3->basedosdados) (3.17.0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.0.0,>=1.3.5->basedosdados) (2023.3.post1)\n", + "Requirement already satisfied: db-dtypes<2.0.0,>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from pandas-gbq<0.18.0,>=0.17.4->basedosdados) (1.1.1)\n", + "Requirement already satisfied: pydata-google-auth in /usr/local/lib/python3.10/dist-packages (from pandas-gbq<0.18.0,>=0.17.4->basedosdados) (1.8.2)\n", + "Requirement already satisfied: google-auth-oauthlib>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from pandas-gbq<0.18.0,>=0.17.4->basedosdados) (1.0.0)\n", + "Collecting fastavro~=1.5.1 (from pandavro<2.0.0,>=1.6.0->basedosdados)\n", + " Downloading fastavro-1.5.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m39.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]<3.0.0dev,>=1.29.0->google-cloud-bigquery==2.30.1->basedosdados) (1.61.0)\n", + "Requirement already satisfied: grpcio-status<2.0dev,>=1.33.2 in /usr/local/lib/python3.10/dist-packages (from google-api-core[grpc]<3.0.0dev,>=1.29.0->google-cloud-bigquery==2.30.1->basedosdados) (1.48.2)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0dev,>=1.25.0->google-cloud-storage==1.42.3->basedosdados) (5.3.2)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0dev,>=1.25.0->google-cloud-storage==1.42.3->basedosdados) (0.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3.0dev,>=1.25.0->google-cloud-storage==1.42.3->basedosdados) (4.9)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib>=0.0.1->pandas-gbq<0.18.0,>=0.17.4->basedosdados) (1.3.1)\n", + "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /usr/local/lib/python3.10/dist-packages (from google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery==2.30.1->basedosdados) (1.5.0)\n", + "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.10/dist-packages (from python-slugify>=1.0->ckanapi==4.6->basedosdados) (1.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->ckanapi==4.6->basedosdados) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->ckanapi==4.6->basedosdados) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->ckanapi==4.6->basedosdados) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->ckanapi==4.6->basedosdados) (2023.7.22)\n", + "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3.0dev,>=1.25.0->google-cloud-storage==1.42.3->basedosdados) (0.5.0)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib>=0.0.1->pandas-gbq<0.18.0,>=0.17.4->basedosdados) (3.2.2)\n", + "Building wheels for collected packages: ckanapi, docopt\n", + " Building wheel for ckanapi (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for ckanapi: filename=ckanapi-4.6-py3-none-any.whl size=40677 sha256=4eb4f9df0c73fa3025f2429e6f965d2ba6e3400e1aa89e362519fdd3e5cde9d4\n", + " Stored in directory: /root/.cache/pip/wheels/64/30/93/f9c2ebf4e93dc7b6abd5e376cdee291a6541ee504ac6ec4062\n", + " Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13706 sha256=5ac855baef597e050318f261d84492bfcfc3da3481552194878163c029de698b\n", + " Stored in directory: /root/.cache/pip/wheels/fc/ab/d4/5da2067ac95b36618c629a5f93f809425700506f72c9732fac\n", + "Successfully built ckanapi docopt\n", + "Installing collected packages: docopt, tqdm, tomlkit, shapely, ruamel.yaml, pyarrow, pyaml, loguru, Jinja2, importlib-metadata, fastavro, click, ckanapi, pandavro, google-api-core, google-cloud-storage, google-cloud-bigquery-storage, google-cloud-bigquery, basedosdados\n", + " Attempting uninstall: tqdm\n", + " Found existing installation: tqdm 4.66.1\n", + " Uninstalling tqdm-4.66.1:\n", + " Successfully uninstalled tqdm-4.66.1\n", + " Attempting uninstall: shapely\n", + " Found existing installation: shapely 2.0.2\n", + " Uninstalling shapely-2.0.2:\n", + " Successfully uninstalled shapely-2.0.2\n", + " Attempting uninstall: pyarrow\n", + " Found existing installation: pyarrow 9.0.0\n", + " Uninstalling pyarrow-9.0.0:\n", + " Successfully uninstalled pyarrow-9.0.0\n", + " Attempting uninstall: Jinja2\n", + " Found existing installation: Jinja2 3.1.2\n", + " Uninstalling Jinja2-3.1.2:\n", + " Successfully uninstalled Jinja2-3.1.2\n", + " Attempting uninstall: importlib-metadata\n", + " Found existing installation: importlib-metadata 6.8.0\n", + " Uninstalling importlib-metadata-6.8.0:\n", + " Successfully uninstalled importlib-metadata-6.8.0\n", + " Attempting uninstall: click\n", + " Found existing installation: click 8.1.7\n", + " Uninstalling click-8.1.7:\n", + " Successfully uninstalled click-8.1.7\n", + " Attempting uninstall: google-api-core\n", + " Found existing installation: google-api-core 2.11.1\n", + " Uninstalling google-api-core-2.11.1:\n", + " Successfully uninstalled google-api-core-2.11.1\n", + " Attempting uninstall: google-cloud-storage\n", + " Found existing installation: google-cloud-storage 2.8.0\n", + " Uninstalling google-cloud-storage-2.8.0:\n", + " Successfully uninstalled google-cloud-storage-2.8.0\n", + " Attempting uninstall: google-cloud-bigquery-storage\n", + " Found existing installation: google-cloud-bigquery-storage 2.22.0\n", + " Uninstalling google-cloud-bigquery-storage-2.22.0:\n", + " Successfully uninstalled google-cloud-bigquery-storage-2.22.0\n", + " Attempting uninstall: google-cloud-bigquery\n", + " Found existing installation: google-cloud-bigquery 3.12.0\n", + " Uninstalling google-cloud-bigquery-3.12.0:\n", + " Successfully uninstalled google-cloud-bigquery-3.12.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "lida 0.0.10 requires fastapi, which is not installed.\n", + "lida 0.0.10 requires kaleido, which is not installed.\n", + "lida 0.0.10 requires python-multipart, which is not installed.\n", + "lida 0.0.10 requires uvicorn, which is not installed.\n", + "bigframes 0.12.0 requires google-cloud-bigquery[bqstorage,pandas]>=3.10.0, but you have google-cloud-bigquery 2.30.1 which is incompatible.\n", + "bigframes 0.12.0 requires google-cloud-storage>=2.0.0, but you have google-cloud-storage 1.42.3 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed Jinja2-3.0.3 basedosdados-1.6.11 ckanapi-4.6 click-8.0.3 docopt-0.6.2 fastavro-1.5.4 google-api-core-1.34.0 google-cloud-bigquery-2.30.1 google-cloud-bigquery-storage-1.1.0 google-cloud-storage-1.42.3 importlib-metadata-4.13.0 loguru-0.6.0 pandavro-1.7.2 pyaml-20.4.0 pyarrow-6.0.0 ruamel.yaml-0.17.10 shapely-1.8.5.post1 tomlkit-0.7.0 tqdm-4.50.2\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "google" + ] + } + } + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-IPLnj5ocyNm", + "outputId": "01b85e04-3feb-4469-f60d-61f4c944f06d" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mounted at /content/drive\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "import zipfile\n", + "from zipfile import ZipFile\n", + "import basedosdados as bd" + ], + "metadata": { + "id": "ttcqngQqdXWh" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Setup inicial\n", + "_necessário rodar apenas uma vez_\n", + "\n", + "* Download dos arquivos \n", + "* Criação das pastas de partição (ano) em output\n", + "* Como os municípios são listados por nome, substituímos pelo id_municipio (ID Município - IBGE 7 Dígitos). Tal check cria a lista de todos os municípios que não dão match com a base de compatibilização extraída do diretório de município. Baseado nessa lista, faz-se o replace para um match 1:1" + ], + "metadata": { + "id": "1fnt_SnhZNq0" + } + }, + { + "cell_type": "code", + "source": [ + "# download dos dados originais\n", + "for ano in [*range(2003, 2023)]:\n", + " !wget --no-check-certificate -P /content/drive/MyDrive/basedosdados/br_inpe_queimadas/input https://dataserver-coids.inpe.br/queimadas/queimadas/focos/csv/anual/Brasil_sat_ref/focos_br_ref_{ano}.zip" + ], + "metadata": { + "id": "7KCkQkVHJV8n" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# cria pastas particionadas por ano\n", + "for ano in [*range(2003, 2023)]:\n", + " directory = '/content/drive/MyDrive/basedosdados/br_inpe_queimadas/output/ano={}'.format(ano)\n", + " if not os.path.exists(directory):\n", + " os.makedirs(directory)" + ], + "metadata": { + "id": "Kl0G_5N7JZO4" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# compatibilização da base entre nomes, siglas e ids\n", + "query = '''\n", + "SELECT\n", + " sigla_uf,\n", + " UPPER(nome_uf) AS estado,\n", + " id_municipio,\n", + " UPPER(nome) AS municipio\n", + "FROM basedosdados.br_bd_diretorios_brasil.municipio\n", + "'''\n", + "\n", + "comp = bd.read_sql(query, billing_project_id='input-dados')" + ], + "metadata": { + "id": "GZ7gqHQXJpi4" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# check de match para municipios\n", + "unmatch_list = []\n", + "\n", + "for ano in [*range(2003, 2023)]:\n", + " file = '/content/drive/MyDrive/basedosdados/br_inpe_queimadas/input/focos_br_ref_{}.zip'.format(ano)\n", + " # descompacta o arquivo csv\n", + " with ZipFile(file) as z:\n", + " with z.open('focos_br_ref_{}.csv'.format(ano)) as f:\n", + " df = pd.read_csv(f)\n", + " # merge com diretorio\n", + " df = pd.merge(df, comp, how='left', on=['estado', 'municipio'], indicator=True)\n", + " # cria lista de municipios com typos\n", + " unmatch = df.query('_merge == \"left_only\"')['municipio'].drop_duplicates().to_list()\n", + " # join entre anos\n", + " unmatch_list = unmatch_list + unmatch\n", + "\n", + "# função que remove duplicados na lista\n", + "def remove_repetidos(lista):\n", + " l = []\n", + " for i in lista:\n", + " if i not in l:\n", + " l.append(i)\n", + " l.sort()\n", + " return l\n", + "\n", + "unmatch_list_unique = remove_repetidos(unmatch_list) #lista conta com 26 municipios e typos para dados até 2022" + ], + "metadata": { + "id": "9CWB-i0UCDsI" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Tratamento" + ], + "metadata": { + "id": "gLvxFJ9UZ629" + } + }, + { + "cell_type": "code", + "source": [ + "for ano in [*range(2003, 2023)]:\n", + " #if ano == 2003:\n", + " file = '/content/drive/MyDrive/basedosdados/br_inpe_queimadas/input/focos_br_ref_{}.zip'.format(ano)\n", + " # descompacta o arquivo csv\n", + " with ZipFile(file) as z:\n", + " with z.open('focos_br_ref_{}.csv'.format(ano)) as f:\n", + " df = pd.read_csv(f)\n", + " df['municipio'].replace({'ARAÇÁS':'ARAÇAS',\n", + " 'ATÍLIO VIVACQUA':'ATILIO VIVACQUA',\n", + " 'AUGUSTO SEVERO':'CAMPO GRANDE',\n", + " 'BIRITIBA MIRIM':'BIRITIBA-MIRIM',\n", + " 'FLORÍNEA':'FLORÍNIA',\n", + " 'IGUARACY':'IGUARACI',\n", + " 'ITAOCA':'ITAÓCA',\n", + " 'ITAPAJÉ':'ITAPAGÉ',\n", + " 'IUIU':'IUIÚ',\n", + " 'JANUÁRIO CICCO':'BOA SAÚDE',\n", + " 'LAURO MÜLLER':'LAURO MULLER',\n", + " 'MUQUÉM DO SÃO FRANCISCO':'MUQUÉM DE SÃO FRANCISCO',\n", + " \"OLHO D'ÁGUA DO BORGES\":\"OLHO-D'ÁGUA DO BORGES\",\n", + " 'PASSA VINTE':'PASSA-VINTE',\n", + " \"PINGO D'ÁGUA\":\"PINGO-D'ÁGUA\",\n", + " 'POXORÉU':'POXORÉO',\n", + " 'RESTINGA SÊCA':'RESTINGA SECA',\n", + " 'SANTA IZABEL DO PARÁ':'SANTA ISABEL DO PARÁ',\n", + " 'SÃO CRISTÓVÃO DO SUL':'SÃO CRISTOVÃO DO SUL',\n", + " 'SÃO LUIZ DO NORTE':'SÃO LUÍZ DO NORTE',\n", + " 'SÃO LUIZ DO PARAITINGA':'SÃO LUÍS DO PARAITINGA',\n", + " 'SÃO VICENTE FÉRRER':'SÃO VICENTE FERRER',\n", + " 'VESPASIANO CORRÊA':'VESPASIANO CORREA',\n", + " 'WESTFÁLIA':'WESTFALIA'}, inplace=True)\n", + " df['municipio'] = np.where((df['estado'] == 'BAHIA') & (df['municipio'] == 'SANTA TEREZINHA'), 'SANTA TERESINHA', df['municipio'])\n", + " df['municipio'] = np.where((df['estado'] == 'PARAÍBA') & (df['municipio'] == 'QUIXABA'), 'QUIXABÁ', df['municipio'])\n", + " df = pd.merge(df, comp, how='left', on=['estado', 'municipio'], indicator=True)\n", + " df.rename(columns={'lat':'latitude', 'lon':'longitude', 'data_pas':'data_hora', 'foco_id':'id_foco'}, inplace=True)\n", + " df = df[['sigla_uf', 'id_municipio', 'bioma', 'id_bdq', 'id_foco', 'data_hora', 'latitude', 'longitude']]\n", + " df.to_csv('/content/drive/MyDrive/basedosdados/br_inpe_queimadas/output/ano={}/microdados.csv'.format(ano), index=False, na_rep='')" + ], + "metadata": { + "id": "BSL7hoyDgZCs" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/models/br_inpe_queimadas/schema.yaml b/models/br_inpe_queimadas/schema.yaml new file mode 100644 index 00000000..74e3ea49 --- /dev/null +++ b/models/br_inpe_queimadas/schema.yaml @@ -0,0 +1,33 @@ +version: 2 + +models: + - name: br_inpe_queimadas__microdados + description: Microdados do banco de dados do INPE sobre queimadas + columns: + - name: ano + description: Ano + - name: sigla_uf + description: Sigla da Unidade da Federação + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: sigla_uf + - name: id_municipio + description: ID Município - IBGE 7 Dígitos + tests: + - relationships: + to: ref('br_bd_diretorios_brasil__municipio') + field: id_municipio + - name: bioma + description: Bioma da área de registro da queimada + - name: id_bdq + description: ID BDQueimadas - Banco de Dados de Queimadas do INPE + - name: id_foco + description: ID Foco de incêndio + tests: + - unique + - not_null + - name: data_hora + description: Data e hora de registro do foco de incêndio + - name: centroide + description: Latitude e longitude do foco de queimada