From 246943c7803388ef2e596c06ab2af5fbe43e92f6 Mon Sep 17 00:00:00 2001 From: Diego Oliveira Date: Thu, 2 May 2024 18:55:28 -0300 Subject: [PATCH] [Infra] Version 2.0.0b16 (#1678) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [infra] Version 1.7.0 python-package * [infra] fix update_columns test * [infra] remove unused import * [infra] add to_partition utility function * [infra] add test for to_partitions * [infra] pump package version 1.6.9-b2 * [infra] add break_file feature * Revert "[infra] pump package version 1.6.9-b2" This reverts commit 0cba449c23e348468531d74f1edb35c91325d547. * feat: add `connection_id` to external data configuration * fix(Datatype): add connection id for external configuration * feat: add automatic management of BQ connection * chore: fix linting issues * feat: add test folder to gitignore * feat: release beta version * feat(Connection): add `service_account` property * feat(Base): add IAM stuff * chore: fix linting issues * feat: automatic granting roles to BigLake service account * feat: better error handling, set biglake permissions is now optional * feat: release beta version * chore: modify log message * chore: make all partitions string * chore: merge master * add __version__ atribute (#1488) Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * add option to change copied table name (#1489) Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * fix: pylint * changing python_path fixture * adding shapely as dependency to downgrade if already installed, like Colab * pylinting files * bump version 1.6.10-beta.1 * bump version 1.6.10 * updating version * return update_columns to Table class * authentication methods in base class * method to return dataset id from slug using graphql * method to return table id from slug of dataset and table using graphql * using variables in graphql query * change default & log downloaded path * authentication with graphql * change version in pyproject.toml * chore: refactor connection imports and make working dir default for storage download * chore: make staging the default mode for storage download * chore: make staging the default mode for storage download * fix: pylint * chore: release new beta version * small corrections in 1.6.11 * methods to retrieve metadata from graphql api * adding data to api_data_dict * adding exists_in_api method * changing is_updated method * method to get a request in graphql * logging errors with loguru, instead of print * writing yaml files before updating the database * helper to convert case from snake_case to camelCase and vice versa * start refactoring the query to use alias * refactoring: clean edges and nodes from graphql response * moving graphql queries for separated files and others * improving unit tests and graphql for api_metadata * changing api_response for compatibility with current yamls * Hotfix storage init args (#1576) * [dados]br_ibge_estadic.indicadores (#1535) * dados * dados * title * observation_level * escolaridade --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * [dados] br_ipea_avs (#1530) * up br_ipea_avs * Ajeitando os comentários da equipe de dados. * Alterando a temporal_coverage * Ajustando o PR.3 * Create code * Delete code * Create br_avs_ipea * Add files via upload * Delete br_avs_ipea * Subindo novamente toda a base, devido as alterações. * Ajustando o PR * Delete br_ipea_avs.ipynb * update * update * update * update * update --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Update table_config.yaml (#1553) Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * [dados] br_me_clima_organizacional (#1548) * dados * partner_organization --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * [dados] br_me_exportadoras_importadoras (#1521) * Sobe br_me_exportadoras_importadoras * Faz correções no script * Corrige erros no table_config apontados na correção do PR * Corrige erros no table_config apontados na correção do PR v2 * Corrige erros no dicionário * Corrige tipo do CEP * Corrige o tipo do dado do CEP no publish.sql * Delete dataset_config yamls --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Laura Amaral <100051996+laura-l-amaral@users.noreply.github.com> * [dados] world_fao_production (#1536) * Abrir PR world_fao_production * Corrige erros apontados na correção do PR * Corrige erros apontados na correção do PR v2 * Altera nível de observação do table_cofing da tabela item * Corrige erros apontados na correção do PR * Corrige a partição e altera a nome da variável ano para year * remove palavra repetida da descrição --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Laura Amaral <100051996+laura-l-amaral@users.noreply.github.com> * [dados] world_wb_wwbi.country_finance (#1538) * dados * update * data * add observacoes --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * [dados-atualizacao] update pib municipio (#1559) * update pib municipio * updates * updates * Update table_description.txt --------- Co-authored-by: Gabrielle Carvalho <77730866+gabrielle-carv@users.noreply.github.com> * [dados] br_ibge_estadic (#1560) * update * update metadata_modified * update metadata_modified * update metadata_modified --------- Co-authored-by: Laura Amaral <100051996+laura-l-amaral@users.noreply.github.com> * [dados] br_ibge_munic (#1534) * dados * update table_config * update * code * code novo * sigla_uf --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * [dados] br_me_siconfi.uf (#1546) * upload br_me_siconfi_uf * update review gabs * Update table_config.yaml * Update table_config.yaml * Update table_config.yaml --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Laura Amaral <100051996+laura-l-amaral@users.noreply.github.com> * [dados] br_ipea_avs (#1564) * up br_ipea_avs * up br_ibge_estadic * Ajustando os comentários da equipe de dados. * Ajustando cobertura temporal. * Delete README.md * Delete dataset_config.yaml * Delete publish.sql * Delete schema-prod.json * Delete schema-staging.json * Delete table_config.yaml * Delete table_description.txt * update * update * update * update * update * update * update * Delete br_ibge_estadic_educação.ipynb apagando código estadic * Delete publish.sql apagando publish.sql * Delete schema-prod.json apagando schema-prod * Delete schema-staging.json apagando schema-staging * Delete table_config.yaml apagando table_config * Delete table_description.txt apagando table_description --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * [dados] br_bcb_estban (#1561) * Abre PR do conjunto br_bcb_estban * Corrige erros apontados na correção do PR * Delete dataset_config.yaml * Corrige erros apontados na correção do PR v2.0 --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Laura Amaral <100051996+laura-l-amaral@users.noreply.github.com> * update dicionario br_me_rais (#1567) Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * [dados] br_ibge_estadic (#1531) * up br_ipea_avs * up br_ibge_estadic * Ajustando os comentários da equipe de dados. * Ajustando cobertura temporal. * Delete README.md * Delete dataset_config.yaml * Delete publish.sql * Delete schema-prod.json * Delete schema-staging.json * Delete table_config.yaml * Delete table_description.txt * update * update * update * update * update * update * update * update * update dicionario br_ibge_estadic * update br_ibge_estadic * update dicionário br_ibge_estadic --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * [dados-atualizacao] update `br_sgp_informacao.despesas_cartao_corporativo` (#1570) * update cartao corporativo * Update table_config.yaml * [dados] world_spi (#1555) * update world_spi * update * update * update_2 * update world_spi * fix: dataset_id --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Crislane Alves <58278652+crislanealves@users.noreply.github.com> * add required args * [dados-atualizacao] br_inep_indicadores_educacionais (#1566) * [dados-atualizacao] atualiza os dados para 2022; atualiza table_config, cria script em python * Update table_config.yaml * Update table_config.yaml * Update table_config.yaml * Update table_config.yaml --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --------- Co-authored-by: Gabrielle Carvalho <77730866+gabrielle-carv@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Patrick Teixeira <105399231+tricktx@users.noreply.github.com> Co-authored-by: Gabriel Pisa <61624649+folhesgabriel@users.noreply.github.com> Co-authored-by: Laura Amaral <100051996+laura-l-amaral@users.noreply.github.com> Co-authored-by: Arthur Gusmão Co-authored-by: Crislane Alves <58278652+crislanealves@users.noreply.github.com> Co-authored-by: Fernanda Scovino Co-authored-by: Lucas Moreira <65978482+lucasnascm@users.noreply.github.com> * removing coverage from query * fixes in dataset and table config files creation * owner_org and exists_in_api methods * removing references to REST API and treat errors in login * adjustments in data_dict, tests for publish * initializing RemoteAPI for mutations * mutation to create a dataset * adjustments in data_dict and others * corrections and new table for publish tests * removing IDE settings from project * prevent exclusion of tmp_bases when it already exists * change filename to avoid conflict in tests * commiting notebook file as it does not rollback, despite the fact that contents are identical * stable exists method * metadata_modified as datetime * chore: remove yaml dependecy from metadata.py * chore: remove yaml dependecy from metadata.py * feat: remove more code * feat: remove more code * initial structure * chore: refactor graphql requests * chore: refactor graphql requests * chore: refactor graphql requests * chore: make publish_sql * feat: add backend class for handling interaction with graphql * chore: clean some code and comment parts where table_config are needed * chore: clean some code and comment parts where table_config are needed * chore: add dataset config query * chore: add table config query * feat: create dataset and use API metadata * chore(deps): remove unnecessary deps * chore: minor cleanup * chore: delete file * feat: fix occurences of `table_config` * feat: add API url to config init * feat: add structure for `Metadata.create` * chore: more table modifications * feat: table create using data_columns and partitioned data * feat: some refactor and finish table.create * chore: better casing * chore: better casing * chore: better logging * chore: update table.create docstring * chore: clean config files * feat: refactor table.publish and table.update * chore: make publish.sql from staging schema * feat: get partition dict from storage * chore: rename some methods * chore: update and publish only acts in prod and uses the staging table schema to generate the prod publish query and update schema * chore: load schema using SchemaField, remove code that depends on template * chore: refactor init process * chore: remove upload function from cli * chore: remove upload function from cli * chore: clean unused imports, redo poetry packages and release 2.0.0-b1 * chore: add a new dependencie requests-toolbelt * chore: add tomlkit and better error if columns does not have name * chore: error handling and make publish and update get info from api if existis * fix: typo in _get_columns_from_data and better infos * chore: add tomlkit * chore: error handling in case that API is off * chore: error handling in case that API is off * hotfix: chang metadata base_url * chore: get backend metadata from cloud tables * feat: bump beta version * chore: change mode in table.delete * chore: pump version * chore: no more version number on files * feat: implement external warnings and messages * feat: add csv_delimiter and allow csv_allow_jagged_rows * fix: pump bd version and add new parameters csv_delimiter and csv_allow_jagged_rows * chore: cleanup * chore: remove compressed r package * chore: refactor dependency management * chore: fix linting issues * chore: remove pylint action * fix: change install instructions * feat: pump bd version * fix: change install instructions * feat: pump bd version * feat: add new parameter csv_skip_leading_rows and setup.py * feat: pump bd version * chore: fix conflicts * chore: add `all` extra * chore: lint * feat: create branch v2.0.0 * fix: add csv delimiter to schema * feat: expand credential scope to drive and bq * chore: start cleaning tests * chore: add timeout to pypi warning --------- Co-authored-by: lucascr91 Co-authored-by: Mauricio Fagundes Co-authored-by: Gabriel Gazola Milan Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Fernanda Scovino Co-authored-by: Gabrielle Carvalho <77730866+gabrielle-carv@users.noreply.github.com> Co-authored-by: Patrick Teixeira <105399231+tricktx@users.noreply.github.com> Co-authored-by: Gabriel Pisa <61624649+folhesgabriel@users.noreply.github.com> Co-authored-by: Laura Amaral <100051996+laura-l-amaral@users.noreply.github.com> Co-authored-by: Arthur Gusmão Co-authored-by: Crislane Alves <58278652+crislanealves@users.noreply.github.com> Co-authored-by: Lucas Moreira <65978482+lucasnascm@users.noreply.github.com> --- .flake8 | 4 + .github/workflows/lint_python.yaml | 29 - .../metadata-validate/metadata_validate.py | 2 +- .../workflows/table-approve/table_approve.py | 2 +- .gitignore | 13 + .pre-commit-config.yaml | 54 +- Makefile | 16 - basedosdados_0.2.2.tar.gz | Bin 20413 -> 0 bytes ...ao]_br_inep_indicadores_educacionais.ipynb | 5282 ++++++++--------- .../test_table/table_description.txt | 4 - poetry.lock | 667 --- pyproject.toml | 18 - python-package/.flake8 | 7 +- python-package/README.md | 24 +- python-package/basedosdados/__init__.py | 39 +- python-package/basedosdados/__main__.py | 6 +- python-package/basedosdados/_version.py | 3 + python-package/basedosdados/_warnings.py | 124 + .../basedosdados/backend/__init__.py | 298 + python-package/basedosdados/cli/cli.py | 886 +-- .../basedosdados/configs/config.toml | 21 +- .../configs/templates/table/publish.sql | 2 +- python-package/basedosdados/constants.py | 21 +- python-package/basedosdados/download/base.py | 29 +- .../basedosdados/download/download.py | 29 +- .../basedosdados/download/metadata.py | 25 +- python-package/basedosdados/exceptions.py | 7 +- .../basedosdados/schemas/columns_schema.json | 226 + .../basedosdados/schemas/dataset_schema.json | 2127 +++++++ .../basedosdados/schemas/table_schema.json | 1071 ++++ python-package/basedosdados/upload/base.py | 235 +- .../basedosdados/upload/connection.py | 150 + python-package/basedosdados/upload/dataset.py | 213 +- .../basedosdados/upload/datatypes.py | 88 +- .../basedosdados/upload/metadata.py | 749 --- python-package/basedosdados/upload/storage.py | 80 +- python-package/basedosdados/upload/table.py | 1069 ++-- python-package/basedosdados/upload/utils.py | 92 + python-package/poetry.lock | 2788 ++++++--- python-package/pyproject.toml | 85 +- python-package/requirements-dev.txt | 60 - python-package/setup.py | 8 +- python-package/tests/conftest.py | 164 +- python-package/tests/sample_data/config.toml | 27 +- .../table/arquitetura_municipio.xlsx | Bin 0 -> 9457 bytes .../tests/sample_data/table/publish.sql | 2 +- .../tests/sample_data/table/publish_part.sql | 2 +- .../tests/sample_data/table/table_config.yaml | 158 - .../sample_data/table/table_config_part.yaml | 164 - .../table/table_config_part_wrong.yaml | 170 - python-package/tests/test_base.py | 77 +- python-package/tests/test_cli.py | 186 +- python-package/tests/test_dataset.py | 5 +- python-package/tests/test_datatype.py | 2 + .../tests/test_download/test_download.py | 13 +- .../tests/test_download/test_metadata.py | 83 +- python-package/tests/test_metadata.py | 8 +- .../test_metadata_api/test_metadata_api.py | 195 + .../tests/test_metadata_api/test_publish.py | 168 + python-package/tests/test_storage.py | 40 +- python-package/tests/test_table.py | 50 +- python-package/tests/test_utils.py | 96 + 62 files changed, 10522 insertions(+), 7741 deletions(-) create mode 100644 .flake8 delete mode 100644 .github/workflows/lint_python.yaml delete mode 100644 Makefile delete mode 100644 basedosdados_0.2.2.tar.gz delete mode 100644 poetry.lock delete mode 100644 pyproject.toml create mode 100644 python-package/basedosdados/_version.py create mode 100644 python-package/basedosdados/_warnings.py create mode 100644 python-package/basedosdados/backend/__init__.py create mode 100644 python-package/basedosdados/schemas/columns_schema.json create mode 100644 python-package/basedosdados/schemas/dataset_schema.json create mode 100644 python-package/basedosdados/schemas/table_schema.json create mode 100644 python-package/basedosdados/upload/connection.py delete mode 100644 python-package/basedosdados/upload/metadata.py create mode 100644 python-package/basedosdados/upload/utils.py delete mode 100644 python-package/requirements-dev.txt create mode 100644 python-package/tests/sample_data/table/arquitetura_municipio.xlsx delete mode 100644 python-package/tests/sample_data/table/table_config.yaml delete mode 100644 python-package/tests/sample_data/table/table_config_part.yaml delete mode 100644 python-package/tests/sample_data/table/table_config_part_wrong.yaml create mode 100644 python-package/tests/test_metadata_api/test_metadata_api.py create mode 100644 python-package/tests/test_metadata_api/test_publish.py create mode 100644 python-package/tests/test_utils.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 000000000..46e07c1a1 --- /dev/null +++ b/.flake8 @@ -0,0 +1,4 @@ +[flake8] + select = C,E,F,W,B,B950 + extend-ignore = E501 + max-line-length = 88 \ No newline at end of file diff --git a/.github/workflows/lint_python.yaml b/.github/workflows/lint_python.yaml deleted file mode 100644 index 85316a306..000000000 --- a/.github/workflows/lint_python.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: Lint Python - -on: - pull_request: - -jobs: - docker_lint: - name: Lint Python - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./python-package - steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: "3.9.x" - - name: Install requirements - run: | - python -m pip install --upgrade pip - python -m pip install -r requirements-dev.txt - - name: Lint python-package - uses: gabriel-milan/action-pylint@v1 - with: - GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} - path: "python-package/basedosdados/" - fail: true - pr-message: true \ No newline at end of file diff --git a/.github/workflows/metadata-validate/metadata_validate.py b/.github/workflows/metadata-validate/metadata_validate.py index 3a22f3c5e..39be6bca2 100644 --- a/.github/workflows/metadata-validate/metadata_validate.py +++ b/.github/workflows/metadata-validate/metadata_validate.py @@ -8,7 +8,7 @@ import yaml from basedosdados import Dataset, Storage from basedosdados.upload.base import Base -from basedosdados.upload.metadata import Metadata +from basedosdados.upload.metadata import Metadata # TODO: deprecate def tprint(title=""): diff --git a/.github/workflows/table-approve/table_approve.py b/.github/workflows/table-approve/table_approve.py index 152a013c8..0ff715bcd 100644 --- a/.github/workflows/table-approve/table_approve.py +++ b/.github/workflows/table-approve/table_approve.py @@ -10,7 +10,7 @@ import yaml from basedosdados import Dataset, Storage from basedosdados.upload.base import Base -from basedosdados.upload.metadata import Metadata +from basedosdados.upload.metadata import Metadata # TODO: deprecate def tprint(title=""): diff --git a/.gitignore b/.gitignore index 1b39a0b23..1cef68eb1 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,13 @@ .mais bases/pytest/* bases/test/ +test/* +test.py + + +.DS_Storage +*/*/.DS_Storage + # NEW repo name .mais @@ -144,6 +151,12 @@ venv.bak/ .spyderproject .spyproject +# VS Code project settings +.vscode/ + +# Pycharm project settings +.idea/ + # Rope project settings .ropeproject diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5f31bfc60..302a2e1aa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,27 +1,41 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: check-added-large-files - id: check-merge-conflict - # - id: check-yaml - id: detect-private-key - # - id: end-of-file-fixer - # - id: no-commit-to-branch - # args: [-b, main] - # - id: trailing-whitespace -- repo: local + - id: fix-byte-order-marker + - id: no-commit-to-branch + - id: trailing-whitespace + +- repo: https://github.com/psf/black + rev: 22.12.0 hooks: - - id: pylint - name: pylint - entry: pylint - language: system - types: [python] - args: - - "--rcfile=.pylintrc" - exclude: .github/ -# - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks -# rev: v2.3.0 -# hooks: -# - id: pretty-format-yaml -# args: [--autofix, --indent, '2'] + - id: black + language_version: python3.10 + +- repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + +- repo: https://github.com/PyCQA/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + +- repo: https://github.com/returntocorp/semgrep + rev: v1.30.0 + hooks: + - id: semgrep + language: python + args: [ + "--error", + "--config", + "auto", + "--exclude-rule", + "python.lang.security.audit.subprocess-shell-true.subprocess-shell-true", + "--exclude-rule", + "yaml.github-actions.security.third-party-action-not-pinned-to-commit-sha.third-party-action-not-pinned-to-commit-sha", + ] diff --git a/Makefile b/Makefile deleted file mode 100644 index ce48bcd0b..000000000 --- a/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -.PHONY: create-env update-env - -REPO=$(shell basename $(CURDIR)) - -create-env: - python3 -m venv .$(REPO); - . .$(REPO)/bin/activate; \ - pip3 install --upgrade -r python-package/requirements-dev.txt; \ - python python-package/setup.py develop; - -update-env: - . .$(REPO)/bin/activate; \ - pip3 install --upgrade -r python-package/requirements-dev.txt; - -attach-kernel: - python -m ipykernel install --user --name=$(REPO); diff --git a/basedosdados_0.2.2.tar.gz b/basedosdados_0.2.2.tar.gz deleted file mode 100644 index 45533163abab2cb8228efa47346f43f9387fd5b9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20413 zcmV(>K-j+@iwFP!000002JO9Da~ns}DClQK@ITZ*<3nHuY?2@;&2TMwXi2ttK3UR7 zX=cu>(;|6MPnovYA)B3fPk{jb*X|vag}Sk20NN-zyu?_$2ZT|_j(DvTm?qRF5d?2S;l*f}*bNnRBQ<)@Ez4t)!EHZ7p?`Zb7 z_aE&3dHdkO{aqMMXLI9;RHHne7lcs9DWF*lSdfpykrYKHimBA77&%ltet9B3n-;}f z9r?H$OyXj?7zS~+1Hh@B`9(jMW0}izB>Uk!-g(ps1R^Uy7mvb1ii>QKi+P@XFGodJ z{IHPug}_HeID|2Fg$mE0E_1Mbd*JLIyn-5ef{P3pEfs6vv(I02_sHoRkUYe{x-_a zaUEZu{`G0czJHz-(BcEwX8W5PC$o8$7wSmN7kQqG_M?Ze0r!K0PFF=o5B7pRcmxxcyQSj72SE?sjFOms_kzP<7k8glJgulwlYZ;vIJJ1gy}?&lZ%`KSn^%scjVd`EB%}Y z5e@C6^z;mk|9Eg(6y z>le?SJUy*%&AUDLo8JG=kH2~P>gDm{r?1UKhMrp(@S{3`)JX_5B!;9tb z)I5ocrq@a)wMpVP#V{#99;y9!wchgGOMmFo3 zZRNjX2?aoA1S!8Iu@L|SDJYMcUd_V!lE?kgG;ew}PZp!MO)qj8Ek?5G&3Tx-?Ui4U zWZCt%yZ;h}z;1v$rs+jEgx^gs;&hzlv&J_Hh7;!DsF3-R_bkwsU$XGnO&7DF%mI}O zS(#rg1$W^uP=!Tx;r!Q;ENVFAzt8172~la0R4|OB(r)YXR@39rRF2;E!+f&*Wwt2h zi%Q2@sMZVcE>Z6~cg)_u8~=B@^Rx7UOx}YB50=Y+yStYE+wb+@`GbSq2cL+;JO1ys z*Pr+@!$Ab6x5EGL?d|P5{-5rDdiw{xJO2Mx`2Q_o73+iqLe1qU9>*i$5CIk6U&NFD zL{4wx-WL2{oXqBlL=C?ZpiUPkt6qqq6uF#bXEF-(u>QiFHGOrbqtW^b!&7Z+=qE=<=25%V z%!aqRI6bKARConaA8V_#LEYt18qTC>Ze^RsV}P6k<1)i$#qpBzA{!#l9#jXS_2Fec z+#mQx`Bku_`Omh+c#+cgHo(yly=Qma!f%59ICMg9iuqf&TC9 z9vs}!|68K}c<&Sz@h}G2_d=Y<#Z+7p8e9nwr-zCjfy@Bp8tL|{|9mD;L|TLsX$X?1 zP zMUl-yv;|0*B`_K+ST|~_y1u}LKxf$25Sm8C5iB??5YFyc05VCXLNpB+E*Q-uX;PtO z-D;e|9Fx_-S)3P(Fu}Iio&ZY1tjST9V?mf;vNu?yfVm2{Q4W6r%A6S>$G3b5)sXM$ z5|)`=01V4wG)36HbqZIUG${S8QKRB-%@_@7RLH5p#>xm>-U&Eg2A2sB6PgBp4Avk* zGj-)2sXa2J!1iDii=@C7czB8*4|ANK+=6eKv0wt&$jw2S_zYI;Oe*&tl}6Q}p@tZr z6&W^#-J=BqGPqco$1n(>;sC1+0w_K}oCkE!6YH^COa@+&f=1(jE-MCB+l(w+XAlh9 ztPIsZ#Xo=!B1y>Rc3a2ke1Y7t)~>?b;~urs4UGw`9h8AET41A-jREtt<%MB0@hdHi zE^B*;30uU;7+-I}Qsj$Nd|TEn(`+G=K9&;gS|r)uB$8JSh=-ibi;D_SL~neMaF3Z@ ziK&G-;$37mQ6Rnm$|lDl-~qNY@c9ruVfqFll}Lj#!MZ$#0`{35l|uZXMqs-PXx?$t z17H#;jRiHA#Ue+BnN9?48Z+pEND{*qsnF|on9MR&bZA?T#ks`8i!mP?!v#Ws193b? zGN%9;)i@j#S&n8_x|Obv>oedpDkQ+$^-O^4n-sCQkf}91a^)&}f}nES!j4)=iN{2x z>29fsZf(OfJmI~#c})FLF(4A)&=7_w@lJieXYMYOIi~ z{?)-o$C#0!-oUY1zy`R~zwe=J=Qg9>I&s8g-r*(*eEx=M;pKQ7>6-dQ9D!748rB?! zGJFM!oQ2f%`qAKI)abUX-Eq&^O^7 zZ7bS>;}{N1I~*RI9ZPv@`b5q)9pbd7A)ol#deE{SrT!{iF7L!qMi88)WZerJVmlAd zckm1H5oT(xHa70Pe(~hR5o~}k7aj#2{?MO>s$XPpB`~sdVhOyTfl>-jOtMdfV-WC; z&rZ`2zJWr(x3UQ|R*W1GQgX=saC2)y7Vm2hGKAq|OG5O}B{v?3OIBQ@&1WLonIv!j}e~>0xC}Clw|LtJSy~SZ2Q#?A*%@D0h!m?JXV`-mw8M z;&HpF>xbgY<7cm)>e_;jzoR2M|Jp5Ne5Sx{rCSA^NTGPC&!jJO<0rtJ|5bC$R@@zw zPo)8I48U<`0jd{;xT-+PhYw1swP6zLM5TfA;c|edqKC%33Xw*|5pD6X@R_hM!5iEQ zeS*u#v;a~OR^OxQ+P>FR!~VGk*C`!c=P?Wr8-?cRc-6&_nZ%Jyv-AEeOfUKb6D+(w z_JMesb6;Pj!m3$BxXuUA6v+iH_M10)@?aG0N{=v}|DyuMBhg#3riLj+#+8Q?h?l@q z!x)X|qq{v4gU`%T;8-68pZSsn{B&}~qk(vF3gwp<*rFAjO2-Y0L3O#e7%E;f9B&y{ zi2bF7;AN;`3(vBdrQhj++uQSYwL)jv990VrEX!;J93PeIO9K09&$i8zFkU1H%>-fy zA|CQ33)ya6Ry(^2E~(1Zx-NQRRUgc0QT^ppsJC9>=@kUutQeZ%6L0Q?jY-;-E<0<8 zi9ci%$++jsN-I~eGo?a$gLKD&Nn=701U4^n?6yV^0Yvob z0N`hyEhbYfjUerME9IQ^J}Jx_U%4nedPunRZc?#F&31vzfJO1cV!B!8UG*;~ zv9%dPEP^t{DPia4A33P=Qy%`vR)2eUb(s;9P;$G0kxjRE;eU?KX#)Sbnl7#Q97V1=%plo*~C zopmZS_q>_?k-N$5vNFb3*M+F|S4Qt&=jsty`~^fco-Y!u`voR3k5S)A3e#fBN_uIo zA&h#Q`Gx6%pwruqwc=Whf^232=UienRL5oJ|3d5GNGHPTNUd41%Ky5G1>3QVR$T>o zbS)iZMP(M?e#MO|j_ut9vsCu7IJ9aeePIE(iF-HB7Am=5ZM@winlu&7w;_Re4dNQ9 zZRrxg-hB|B(7F){XuUr%h~5QARZEZWoD5G(^J`F3Dm=~E^$Z$Es>l<8&uL6}v&8Rl zVh9ZkS+iC3Qi>BoLfH3oM-s*1B+UqO^c;q0lYoW>iJYmr5|Rm3(4aN*^i<7Cs>R$) z8A+$Bl~elrww~N=PccQU(e_07EK3}^0kV}Zx=1jNm=`gUxGxYBzOk%a5|M&o-dFSf zBs=RXH4m~;mZKkL3}P=TF%({Oh04<+{IG~eZ(&boZ%bXS(=J)XP}dF#Cm2eP~it*vodpFb~E)Ib^Z3k2b2n$6--9K{*%i%F)kdKF`WY%6&~n1yd8NNm7% z&6A8(x=B3DLqNL81RwQGVLbrTcv*d7JXLX@#p!}+cbEcGr()HBGQQHJKE%ar2IL9m z+Kvra`^5~{Ecmxw9q=;MQS&DagM4P(UA?|}wCzx;_B(h@Q;WK%G6;0Zh$;!Pa|;X!w}X7(8@lG*fuN%QECbZghYB5eBxLEOaP-=(nqk=2v+LEOPO;h~+VbKUktCf0 zdsLvXRRX0eT8I?UxV07&p5{5MoUV_f)y%GsmptxjCmrIvk@05!OKg7(#{2Ksl&UbY zHR}b(d1sQQvXeO>k`0?Q5ahtaERrP3^DP)-J?Ka)9 zQX^Rkd@o#PE1q{r!i&KJJ1HG2>{WnIOX4^P{&HE)5YK;WH9*is#s(K$_ZHm0_qWfFl+ ztOPx4R(?=oDiaK|(Y3=XnHJ!D*Z6;De?ez@OqUrlR59vJGj|0+FzF?M4rX|@hJiD1 z?o;0DQFsAdz!>(9=1G01T|e69g`8;WHZKiAheuK|PK1^YXpHP0g7fJRy4sRi5opQI z_gG(88&q%`#hYEasKyBvPxyKtMLA$RKmaipN2Wm#`*2~)CN1I*tY-OxcR5m9XY2Ly!!rL!G?eb<~jbx=V+f1Pg4Cjh>B2O zi-Yu%CymK(W{ymq!qwZnn4<(6vzgZ-D?a5?tB*9n|D@l#+ zoz#R{O4hCDS>l^lSYVk(B@vrgay8FVj7$Oz6u3V@Vk`rm0V*=q64A!1KyADch!dzf zB88uS`^R#*2Lkb}(zb+F4SYLL0$wqbLgaJ=3l&sNo@ZnNMfOg$y@+%yAah16gUF)o zzS-3LNVL){l`U+qt-T~BzacQ7%N9S9=WDzK~CjN zq9Yj7YaI3zP)*N(SC~;);R;A^g;o5cG&Wv@NoWv+nhBo3bfOCv@r>nL-yW-E14#Sc zU|_StAqd1vLsFIAGDfH6QCO!l@C=|dhp_^4bwe6kX?VeElMo$-#rLFZC}m>3xE*Q= z5zRLkuvT&=3AY~&>(c8|(R#ID{mnnNzc_jJ?Bw}Z+b2(qXkpDT7=gp8y=XM#avB&T zg)SGTKR}v>0WF3o3V7%A?4qw9La#m$6_W{1_{BSq-QFI_s`z8~z_zcCF;c_9!3Rt? zvElK^*9g`o(jNEjGy)ON0JcJabZ5>(?ivmER+M*l@-RZ(JXXQ|UmZg#0*zZg7qI1H z=B8j{of6!R;qr|k?b%0VPzRvSmX3*-m79)h8HByc%nCLN$pjFix(QW@aK8@-&9j7N z7g*1;N+AFMGxJd=uYT9G%A)Q96#_;v=>63rOK=yAukav}V7^G#}G~ReV z!fHzj5U8{|6Ad1HW~4xI@ez?w(>xqxoshi?qF zu4kuO>+BeBR%z^fNIvHXZbwI7n$o15(^gyd?PC)Opq`a09);d|K=C3>Qh>mSm>(8 zUcb^T!NArXz=BN{6s-|x4c&`q$sbKq@tNx;E`pYK>+31XI^N@wF2^ZsVAwZOCDt#v zSE&;3X=M~JBq&#g_WG7JBo#BX6`EqAn~)n)$l z(A?Nb7@_M1dL!V>%m71BUqLz~D>@R;rj_zldUaq+Z_xc~V5jb~&|NjCM!g&E4biC8 z^fXOnC97fU{7Z7jFvVJ4jr`Tr<3J@yuWczvSJX6^xT(6l-3`9$y7DYc`a_Ue>nc+0 z)-2XM&k(b!<{TTVcDltRdP8C(US6>Xb#&y*P;C;uP>;=3e~+cRVv*vlHJr*SHj_S6 zld4_RYa^LfZB6K|JQD3o3&2&U)o9THt?K7-dKME3Fd%CTF^*L@L6&^+OtgE=b(*!%!hC8J~!@rrF> z^IL#A<8l11ZcX@+4@E?eYaNKqB$$TPq5$%LdLk^U@u4A8PXM2w6WpC;_vweli$k?Zn2zMkNlJ z^AIyP&^>)asG8atz!&<%c*z^}L$c%z4uoc#?(6U3u>4Zuzwi~u#|;3pCjQsK-rj+U z|8jWv0ONn{?cKkN|9dMR;x#r4TG(1alKaTYSkRaJ(qF+vT`zl0A*!*X6Az*l6e~T3FWuv~cGuco4Y5GvH#3 zGueDGOyUtVOLngr_S_W%j;TzZhiI8Vkz+VBPb61tDIJT!IPoou%D~B8{D95Pfgto) z0oD22wVVau33{>^kp;PiJXJZoQR2a^M@^ z;>=;hMSC61GA&i$y>7KDBA{wH0c|vke*V{I&kSW9x1B3!3Os#74JT(bv<97_XatWz zP|(h@aSGeNSmrfyB($nfI09AoM_mO*#TdC>bh%HeB|eOGP2Gq`o`dRjyCU05eJQo) z8^7X8DqhKE6ZP7|3LoBZb{SLrjg2bh-ZB&tagh!rUtHT_o7+1s{@2g{o7iU`RRCDc z|0CzWU&;Tow|}sA$N%3>{Kw4={*O)4>z<|T@?fIA;SB)UD2Llh;`wdO{cJmaRIo<3 zKcnif{&vA;EzX*@&!l)p13Xv;0rEwm7YKr48Rpqp9I-We3S9D*4eE%jR!YzjQ&6^~ zYgr@N^C_l1m;m&S@eW8Lv)y*K0L5=&w$H)+FdTn%kIP2EC&#?AZG zvbwoU$Y_ifmLq0z2sAUJXfA4OLH{Y&HQxDJ4OE;uMpl`OX!^iHr$Jv~K}$ zvROhZZ>u5JYg7gT;huQLqYhjSZwgd1o3R*Te3)`K(VHbc+CH(Iw0%NojC>^$5d6_D z0N0~9Z=+%MJRbJvAXbjTd0d2KVc4-%@uppS*kL$_S5Kckef(Pdf!=+2`r;c=HJvS# z?8`*PqfEkYnJQ>Ejba#G#^mqor>9Q^hY9QL_IByv@$)CbMeGu9wqCz@v(>UdnB%f- zT>TtWU32|H>S$YoL;_QdrWfVHT$7-3qfC`B?9fW+TT8mSDldm8xw#yi)$~51mdB@7 znNxt;jkmc%gsi=@IYmEx^y%iR(0zP^T`Mw1S4qm0BV3N1O;6->L&pEvdG^^b)V_8H&#uFZ zTymgH|3V(kfZT;V_jF~!s|acAK_~v3s61z9Nq`hBqc9#Z-`LFONi3sg=t0_Nk)7fe zI7+T#At4pvm_GvtR-@qN1NQKt_}vQ7h8fHc1?yZ5+ZR}_0TMg0s@#e`xB~Hbnq_#U zn97E^{M}6_(*S3wSScKeCTI=B%*HA*4K&=X>BuBkgUb9n)ZyWC1nspBt~%-u_1Sz2 zSDKA-45Jc-X>TqO`MdsP=qNuOCKOzu9u~<`N_ZJlvgsDyvrS}v|Dfg7tzEL3Ouznh zs(D7c81}LrQRQ9{A6_wa0jl$#Hd!kCMX|Z?C0(ZyRyJbvv(V-xb zdfPBaE+&N^3*U&Wxi&#!$9e*Krh7;iSk*Qlxs%N$;CBUYuV`TL9p z1cOD+=aI*lRCy9+bmC!93h2pCvw47lAZ(GBCtrvidJO+9l5iAeKmqnZN=Eg6nICeT z&Y;Tb#KVc~V|%v0fv!)L{`-rQ=gTUYZmU(ZIaPh}yfzfvKLD`*h>AWt!6fL1!sC`g zj!SB=zil!KTHXjfiBQw$+h?%J#D&>pHYhrI36p0`SKuj!dmuQ~rc!QgjfPcw;dDw5 zi7+?-fTy51SB3|*FP!I2n>F^1VTFQSwqR#3oleKAfwX2t1wQ{bO>pwlC$CnOKE^>`GZ^w)Hxu@irs+CqHcweZDkl38I-2f8A4 z)o#(BR%gHrh^FRMA&Cw1VD1I*fvqGp3Rp0FWCV3uedcao*`8^Zaw9DVG zEB@Xqh3KpWN`?Yl<{IQ)m%J5n~*sa zhFBlSCP*Da7; zKJ)&~v)oqh|1c7Kf0V>1AfDdN{U5yBzhAlk>+K%i<^Q~$`@g#JAE}9kFfABI5q%`T ze*OBTNjaqg+%m(cuYcF%0GUSfjJM+&|5{%Wn*ht!?s)zO^xJaUl$BwTlhn~N*$PUm zSC&t9KrG(b-w6p8%w*eQ%`TFaz6A?fn1{eJ?6ys#T?@13&N1)$(a~2=Uu!13(uC%~ z>8=F{(iXaeZ>{0WxlNtSz3sgy(ZCi{D$A7L`u?BEB4lCn_U`{-|6BWCpaPWtalf~J zxBqV`|Klqe<|7V1724cmAoabK7w1_X`4M!fWFI#2%)I4WPT*P|N)GZHb zAHZo1$Imnuv>jGm5an_X47TitNz%+n2Ri0WHLLHMJWwW^0>j&Gg_DI2pHO`xqs1uv z>Hq%e|H@ph7j1dGQ7&7ek=`6?YcNMl+nTUg3fd|+qUuYiecozscb7OrdZky@#<(k|{C7Sc;Qfm|7rF};2Us^FdN^Y7rKg&%yK`BasjWC7HdzBx#6|Lvmx7vv4vgI)s&z5MoSt5~ z+t7`9WWr1p=`#FehC&9D0Ppw0D5Cf^HWN*YV_Ttv**u8kd;!N$)?)PqOC8Wyti-@8 zH&~Ido2dzn#L|=~R-;hUvs!YwVVuG?%+6IK2}2B^n-DY|_QuXvH~tJP3qmFukf`mC zx@Cf#G!!fm5I4KdezrnE$}|Kifu$qU?A#HA209}A)qCjuaCE`YaCQ-t1<(`AIwE(a zv4{geT(o^0vx6|?%8QZFOlHiT91)p#jDmCZ(%Q83drG6x70-{IhP&StmDgUYo{|6h z#=XKT`tVD2+{ms>w=TO8EDQy?b>0jtqdKxz;|!?_RU(bL3??FJ{}=4>-&f9o(m~Kr z5$R%i2^$LKc+^P~CD`s1k0^Y_kzp6J)LeL)oP+-glHe(PxkFD4_~G4|cZ1c;MS1s6 z%wF#6X)5o1yy7ukc8mdjiAtgTAhh_fhS&*SulJ#IjJ`ud>Kc}Gqc)PN3%XnCt}(1} zv}zTvV-eM-QcX0+>Ik}8~)<+r_`n}^o4Hu_h(9f<*IHn8~P5gup1=mTl zf~uHq@Mp)lz*)31H&y7KSHiO2J`KMq0bh#AONjWjvD%!OK8;<5)EgE2TN>lTqm_+i zJ|bXk1rcfS`&I}Q4uQo zKYRXsOafKyx; zuMGbcnr`tGKl&(~lQ*_C9;49Y2KqpJgLktWD}N|4!m7LaKqsfNCrc32+?||jy0LCx z7j-)+s&0}P*IZ7J!8TT&1lX22(4@MVX`-f-n;c>l^*lMo`){M&V5h4eBe8~B@t7Zr zKZq|LvUrWDN5)M+eE^^upM}6s2T7 z&$Dnewbpxh+HZ5L5)hd5$%px57UXMqK?X#t&dqrk=VbFY@UxT$pPBPir!^0}W~kg= zgFz%Gk&RQAIBO_%$d#zpK(EQEA%)TEK+1j_-)C`JZUUbG<)uGbT4R^lKpc-oa?Z82 zZ!1Q7867sYw|U)LF@dIzX*d>jkS#qvYbv4x-2r% zf#U>V&f5$P2Zw&xoD%x-_4oNyI}cFv9`B_e`OKH_h^4lLYjX3@k6)gA(e=zL_o`Ns z&3+q7`)xGXD%KevnW~E!u{n5kQ7vOH*tLGR`rfxJZS*NH{W1efzcx2=>+Vvr0)p-D z{3`d{*PicVNju8>O&g))gXwOj4}leZFXw_rue@G^TOLBQlp(_aC}=Fvro0Cid&(uC9^ zdCQ|DP@8BiG!`E04A|Yd%z34+#Ea+lO-a#%Xb8d&lz7=XCX#(R6$A4_qvq$ROf63Nnonl089rN zf8=j|F{H9_|I^}S{{L`yZ+Cwm_W$nP{ogHp zf-HSjjgXYYL-zKbznzfLq>bsiE`BT-9J=t&D}t1AxY1fv>FZXk#GAHZQAbI9bOitO zr+e*IIr}b`vIUZuO$$cGdZG1qIaHV(QY0%hf+0HDC)eZ*L z^|}RG*e(My?X_EvQE^sW1I8z@8Uep_M8?eUC74WhibeS4@c5 zvTL?^{}f@U;YjG&7-h4aW=Ng^ODUt1^hhN1El#O0W*rLC3#zmY6}BtYp38H^ z7nMtv0eV&QtUy6^xQL0%!*)Zo7n0lIc4`}drni@M^LH4lI-bv6Cph}x93O}CIW_t9 z|M>eP`f{{)Iy--y4U^>J@IPLD@xQw^{*f(SFNPjoou8iv7jSsOLmunS7Yw<^(=`aA zgD$bMJA;NhUL&g7GglGi-OthS#HgcNfp)FJ7;V?|5bVDHK**<4au%aH8}@x13LxfRFNUb#do9x`>;st>@<$43D$cUePyZ85?d10u z7&t+Fd6@rB{Pce+aVCGnGz6JfaFXGCLz@xE(lS9}l8f(u`llFYvB-vSs-K0p?-%q? zssjI=nKV`zRSVMvumYWkUeX(vDNi#9N)bw8&#P zta44kMnCMf)iX_m=^GLdIRV!g)5& zkeq(Ah(egSP;G7;4}qX9W}!Giz_E{Sb@GUx{ui`+qfvLn0Dlc&B(p44K$5XDBGsY5 z5VUv=#}-#V{m*p7s}Hn$xR`X|1rH~NX3;uUVDwzdGQ-}LTo&Wv@sb0mXCWd;Sin?( zkU7I}zl=-JGSp2?`xa}Uw5^VaLb!$C;9cMI3_}P}KL8NmD+s(|9G+!)7f(fr3=i}` zL#360mgPG@wFDDd&{h@%UPdn0$aP$w{}^JSL5^$$A0n3T>; zwSaN`^iP<-G~C=6XvrGRkX(kCpB7~DKKJ0<2Uo6E0i!WTT)qt*z6I{6)kW3aTVvlf zFkqd)LWT+Lrbq%)5%JRDFTz-#NNjH0q_oEFY({7@h~!z3WeHkFQ6otop1^-|)$R-g zvK0t$3`Zu^mU!MuzY?h*pkyHG#L3Po?T7_pd1KATxc(_(2Sv)HO@=r(%wXLk88hcZ z-0}!n03?dvYF^x}w&BTf^3lwwkUP`GfBb!}{`QY|2OUI0s)s^`XfG8qG*9mE<8dDI z4j{yb1s1S%0Vy{(>>GZ-I}X{hmy0A3r#eDI`y~)GRP4n+YM#nYy+x^c4qQoGph3Gl zGiK+bFzZKQe}+V36t+lt$soajARcG4SzL7K`^$xz5<3}7?*9}};#7tS-WCB8%naS} zNLQ%3WFdmH0Xr6veZb=wRGL~3`yBeGh1}eDAz(8WizG8FDl~w`sD@C6mQEtt&%z@y z_?9OBJ28t>lqmIhRgez^Q->^{Q*)*%^25r-;LG?OP}_UyVkUDaplJl{pkv^?BN!|4 z)9zek3WOTsGvKvzdW$e>N{{e=gX}t-gn2v(qj$d(KeNuPLL}0ipP>(A@*X^RuvGuM zYxF+{5B3iZ4?e-t_YXc1hj;wn?ehP){wRt*(W0o!{J2fYA%oNCXW;+$db^hYzkhIe z2>k!y{X72uc0QkquVgCOK5uvdhwHnGiA;M(NbbngFN!G`$TUPH{-;8xVxq@UHX@rB zHiC&$ak@iEa1GJ~AX2=6La#68@{$nf3Lc3{IRZs&sg@1k9^8wAdlzh&MK_IJN=@-t2OO z6^})8RUrSU>kO{Aw&ykh^-m0Y%m<6D!|7WA^f?~kDF(;`b{1G$a#A=aA2>GRgfj4K zbEBT%m7{@UwOAQPHK+_rTjD70!3NzA#EW!;j*hnkwLybN23(C`gf*Gn!>)Kq#2nK>4^{V1jU09 zPIP6Cdoou7R62Hkj&&~JnAUF{HpI?Fk(EYzf6lxawWsb;crYbtd$o=5-*VD$>THX zOC2@0fX-;H6>?3(iO%vR#q&sc#o?F63|vv;UT{flvhgcG8+*X-#oX)?pgyZIHQSr` zZM~be@g|@kmVK*<8uC0e>9h>UZrhFt-Xht9oe6 zo{G>U0D-`ZrLA*oZA|g?2y=Yx{kc;sqa(FoRu0rHcf~ZD#iKZivwoB%nab)_tdpgw zWT5A)Oc@$HCK=|bGHE9!6MWF?=uoN%xM}oWX}6J6C+)zrh|>jc-!O%9RK==>7v8zo zFtEjJ1_u#Lt{su;!$b7sT+G_lVRjq57nXg}Fvw@lKiKxn!)%9Awb2%xrV~%kG60nA z0>kLR^g-=EQcao7a4c=Qkl^?Yttg8o(s&76-H8rA8~;zV>Z5W0>*K#ZIH=oy9^To1 z{zmwJ!ln}UzvKy?jtx)Yh;?!T<)~E^PNd4NuH%X{$50U&m@TLa3;}~NMjABue9+cG zbiOp!a-EJ%7+5aR81jppXmzt+g8v%!u%?TitxKj^T<8B>Kb@}>(9=0J7W+sU}C+APSR!1&q|Svg>g;z$``fgp^)zinPQ?EA7qR3SC@qDNE~%mAxsk-p-(7KsPn0`7{cO>*F9`L zX*Un0K^~(dR03OZE)ybXAXv!~ItRhkC0eA-Bt~lS(mOgy-J!hG33yEy!T_VVne0NT zhXcI*rTU2DTF=XBKa+v{IhR05SMNC+HzMdBG836QjX0 za+>p`s^Xk3{oSQ6#*nPcJDpd1iaYYmC$Q_~e4UYu5(duAuBmMkzzm*RBizvJhNj!%0P3yzlAYC7n3tVdo zX=4Bc+RSK;&Vv*b{b*QMKQwAYRx3ol`N#GbC(oXpJpXF@_)K+7>eo!sHJKEpDSjb06JV+n(`QI=i&Jdj7&F8v7j_xd(WXF zWzqb(fGl%JHv^Zlds=uM9rJXCZYN+7)Z1V+oM;lZ8$?>C|UBN@cg>Y3l>-hT#SPz=ml zTwE~m)%S(^w$O0eOyTfS76St4lPBx^$GC4$B5#MuEK@~?oRh}l9ODy*tQp$eF!!>7;A?#aTSARP z3w9w;fn$Q3%@xp~ zz}OR@*#zP)%Cg!J!s+mt2$ZEWn|uv=T22tyc=5UhGFs)LbOl_(D?gYut5jl}_*>iJ zSStgqxH0-i*2j?iTahe7z$nIAYg9@juONsswDAl~=wIWgW6exTPpx6a<_4e^3Y)Q| z84)@X!BbX1B8I7wT8nyeA;MHXf-O9uA24SwbeR9bDr9mZKxABm@fJ;FyLNejcy?i# zt~z9!%F9hIM?&iYx+r&Jd^mm0Z;_yGaKikMqtIb7hZ7oq_ZP@cG z?vS|rYyk(ITLk{eeWO?WntIz*#@5Dge|>yplsW(baqnkxSXQGLgDa;-(TZ=>-x>T2 z7exj~jJ_rrBKmPCI8-TOV^sv?lL`6P(Oa}%0DMHkR3L3)7o8Bp&kYy62Uw?9CO3R+ zoW*&uKpLiLPYUY_I|S9akD(OU!5qS7C=Ws zbrVW<9hzk?VNe^1cQCIjs$aV&Y2VPy`_*bN&jIVv8T3DQ~1hdIt~ z?x}08PlQW~h@a1PiLS(O9Od}4I25f>#S2^4D2laLPLW*E0XA7aITlB=ByqG(;TOV* z97npIeqpSjtmaP{nq5+%pNR7ia}-cEU=`0u@5jdlW_Ot>eCPtQ_R*d`?31%3c3Q&}3xb-IbT1 zvQ1587?}o`K)Me5A)J{;FIs}2OcO~*2^%*c$GCXZX3L%>=2$A8Bw>Fsrr0sZk6T^g z47C14kT^Ik1Wll?Zg#1uS1Dev@V=t_S0PpNi1jAMBz!CY%2%6>j z;RU6z_rNjhUtr?x(v_m2AIj$yFMRL}YF14*b(wLDfv?RFct95Z^ z4{7jbd}WSt%-Z+SBL>y?zm^leG<7$FRq>sm??vX}8S!KE1J#$C&16kwLWlwzQmUO%q@n=z&#awuYz8d!T4H7Q#| zL9&YTFYRD}-Hpj$u&<2nMB~Uq&W#tsZ50N~V%Y=AFT2dWcnRH-g z3>c-(ShDfBt9hVM+r@9N$19t5;bW@3MU&RMEOFjcw+%RRb%~y3TK6!EIc=#<0|8TD zEdf(+ACPekjPrBxHws}rugng#>Jm>_i`d$n1>%W^P?Y4P;MlxXh4x7;OC173XbKar z*P63Ivum0-!Hm=q12vTFZAJ$xtmn2Ik5Mi*GYSvkD7+wn+uGZOwt1%$@$@U+nEL96 zA3_M$#edrE-M9Lm{Ra;){?p#U;obiK74Cnh0P+by13r~0!IA<@ z5}xQdL{7z0-SZ9A+b&D|QYp<9K>@+Hq7gCxrY;_vjLc-~OI$>Yt-V9wwdO%2fggsQ z(_(SJCZdmxkm41-S`yvCgjq7?#5x`oni?=bP4uWfCF|J0Wz*!3r`5UhtNyq5|3*Y% zMAk|BKAZ(yNB{Tk?|bzBKJtGL?&$w-LjPaGrYoQ9SKlzb|B7A5D8VJ`8b~W57&e0Itc^-Qn^pq5mI=`(IE0_YV&%^8dm8JNf@N#Q)dusm%R1zOM29=7yq}5=~kn z3e?)7iy%k#*M+pO)OC5e756^3E>UdW@&R*G8AhvOS4uF=3%kl)xOt(MA3Jwq&GqSj z5-TSu(jG z{}y}P0Z>6GNBkRJXfx0x9wB2mj3;aeG0w@@37w8ih$jqhf+h_LfujhlY)@nT_g)of z%Y;Vh@=PY#yn0ty>Af9bPhIryGa>Sp#Hqt_#AdSvjK_pS-K$G>5JaPmbF|NJVRk62 zl7Tu9-=G602Po67yrq=$wQyfm%N09R8{s*U<2aReGHUUp}huoOCM%#&~=8zRwG7u%Y^xM;G!aXXLe)BkubKWg-c_4XeRb`PrYA9}lY z{Qs|!{$r{(waqwW4glJPj_li?OzdA`a!B?{%kmOyKh?fnBA=>d)EEkJiUl-})UhIG z;RI<~E{GaTi5~gnH0L@ca_hx&velKuRLSwEMo3?mVuhB|13=?#0IJh$sEEGTT+Z}z zbi|%P_BYPmcY2bpIBftMCh7*YYr$o#4P`!%X4Uq;F-N>T`~TiSE&dB^|2zKg*SP=d z_W#zr)e->z3DW@fNk?;RWUdknR7j!zWnm3^J)>ivNSLxd6kXY#v}P2Y&hT&mKa(Ulqb83tliJKcu^^J`_%PtpYZ`zO!6ik(3B7(StVhyY+92v@3 z)VpLCI`Cr0Js}4L>JxZygvPSt_{_y;Y_mw_EZ0dmBi~?-#)v!F#WGk?n(W#;9Jk=~ z$*WaGPtn)d8GrxQMS|M-uWsG`d++c;P5;yD?cSaLzrypsX8*00*ng`f_TNufi9rO% z_>G-J{*hlH^NlAyy2~v+<(Hh+@Cc7ebJMG3iptT+XL=z1MmioH^GAmg=PVDL;4011 z?K4{j_4|1|!9pP%p?RLwLWw=kv{oi)zAbgLQjd9LU6V0I>gCeJ`3p|d+I#C$PK;t^_JFj-HA z{hH_qQ3lMx8L0u)sz86+8g8k={1-+G$`ou2tukcBqu-KgAjO_!+>jm;c+R z|GWDK_wVw5|BKci|HbHcJGZ?45AGlC-|vCo5Ar{J?$-Zbz5e)RdkDgWz106%_`m&q zivQy8|HH%myZ8^c@<9~}IxR~!b)z%NAi&WPe(U4ccI%5!!69!4xFT`ganC-HbJ(P!NR ztieb(N?gP;!H}iqrBn?S7nzxS2Wbzh+8!Lhfw7Zpi;}ME{>p|M%c+pUvS#lRCSW(^lsM?z~e4DV=euRBforEIX#6Nwg!BqI=tEDw`P zt6f3yYVmJ|c|0k1R@M>QkBHA;HC3Nu@EXdh>8Xx3N}*P7L`jzuLBBtyf_;opZ}}(t7*oZzJ9oJh>#jTvoS$rTyKd0vFDt@LR|%ml3(TtgK-#%Ro3#=Cdoo#sAyoA?DPOE{YyKJtHApX z+p43drEyk1{e4;T0T%piuWjNR2k&$=C1pb%2Nxz88}#a)j>SV46#2iFga~f?ag+TV zOQp?HD&);|a8Y{YCoe{O!O}||2r78zZ3X;{6_F>PW0a${U3tBPG4+tM5k2wx@}_iS zHYYf+=F;e1e)}%A*D~UZ>zHNz-mNXM4*hY#is% z1s_?2<}UQ~4QCA_Kidww>*i?hLnp{J)muI~C4tOGA8mM5r07|U@bc7cO@7?EE~fN8 z-Yo1dpM_!?=Re;h>^DxryX#NF{)bM&O2KaqhkT#1B1gc4(cK^BD_zWQt{l4FOx4*1 z1Yz%R_an4o8mXv#bO&8s=KYmR%h?G|5Tejg;3m`uZyfpm%hA&o2_l)qv-d&F5>mQ8 z-C^q;iltzo)Za*nFXYR0m4>C9lD!xDb@!P|=L%vdF&_%0$i@^j0ETh?<_N#s zBHA2;sR%(rjE4(AD4nLfg=Jg5_c-RK#u@~cgWagGM-F~Wazr|H88uP?y+Iu9@OO}z z$cCZ554mt!)!&}}$656LT>77bgWg{C{%>#Z{+<5kcJBW^Oamm}xiG=TqoR!m%VEcy zi@wCkIvMc?Hq^P6oh;k0UZN;!4QRYG*#-bcA|lVE2=gBn@mYq^m`8FbQy|WZnF$7+ zEttSY6eUr()1>>nA!a{-3>1>w|_=Q8tZuTb`)g-yf5oiT7S)@Y{ieBTmAZb9m41H!j{o}V>GRi5KM;Z&EKCv3!gNZD zGXLqHlQ_N6emhvZFbD6sFbAbxKH?0JTSG`R1{3l*Vx;6uYYOq={Ms@WGZ|=|*}gte zSvh4b%N#Z+q>NjDknADAB!TkNrQr;Z0_#ySBS(Zho9D5_WOzzoHo!cL!jGW*w(bdb zbQqt>*AVlaakbQ;ue!ee#UOH=ZoH*<@e!ub0GRu5U=?Ao825{Kp1CWFd$o@4t>_Bg z8XkM)(60|iWm0OG*}S6TFqwukT1Cz)4Lg(95-(`q2Rq9SB>?nZZ@<%7UyMDq*OzOl sECT*w0?b0|I_0%%_^P61wXFHy-Ieg&=k9a&`8WCe|0wE;umFGo09>=~P5=M^ diff --git a/bases/br_inep_indicadores_educacionais/code/[dados_atualizacao]_br_inep_indicadores_educacionais.ipynb b/bases/br_inep_indicadores_educacionais/code/[dados_atualizacao]_br_inep_indicadores_educacionais.ipynb index bb909c5d3..98c7913ee 100644 --- a/bases/br_inep_indicadores_educacionais/code/[dados_atualizacao]_br_inep_indicadores_educacionais.ipynb +++ b/bases/br_inep_indicadores_educacionais/code/[dados_atualizacao]_br_inep_indicadores_educacionais.ipynb @@ -1,2659 +1,2659 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "PDraahgvOhMo", "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" + "base_uri": "https://localhost:8080/" }, - "language_info": { - "name": "python" + "outputId": "7eed9265-4309-4bf2-a9b3-305e9c78d778" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", + "Collecting basedosdados==1.6.10b1\n", + " Downloading basedosdados-1.6.10b1-py3-none-any.whl (51 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m51.0/51.0 KB\u001B[0m \u001B[31m1.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: pandas-gbq<0.18.0,>=0.17.4 in /usr/local/lib/python3.8/dist-packages (from basedosdados==1.6.10b1) (0.17.9)\n", + "Collecting Jinja2==3.0.3\n", + " Downloading Jinja2-3.0.3-py3-none-any.whl (133 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m133.6/133.6 KB\u001B[0m \u001B[31m5.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting pyaml==20.4.0\n", + " Downloading pyaml-20.4.0-py2.py3-none-any.whl (17 kB)\n", + "Collecting pyarrow==6.0.0\n", + " Downloading pyarrow-6.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25.6 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m25.6/25.6 MB\u001B[0m \u001B[31m19.6 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting tomlkit==0.7.0\n", + " Downloading tomlkit-0.7.0-py2.py3-none-any.whl (32 kB)\n", + "Collecting ruamel.yaml==0.17.10\n", + " Downloading ruamel.yaml-0.17.10-py3-none-any.whl (108 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m108.4/108.4 KB\u001B[0m \u001B[31m9.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting tqdm==4.50.2\n", + " Downloading tqdm-4.50.2-py2.py3-none-any.whl (70 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m70.9/70.9 KB\u001B[0m \u001B[31m6.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting google-cloud-storage==1.42.3\n", + " Downloading google_cloud_storage-1.42.3-py2.py3-none-any.whl (105 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m106.0/106.0 KB\u001B[0m \u001B[31m8.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting shapely<2.0.0,>=1.6.0\n", + " Downloading Shapely-1.8.5.post1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.1 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m2.1/2.1 MB\u001B[0m \u001B[31m22.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting loguru<0.7.0,>=0.6.0\n", + " Downloading loguru-0.6.0-py3-none-any.whl (58 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m58.3/58.3 KB\u001B[0m \u001B[31m4.4 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting importlib-metadata<5.0.0,>=4.11.3\n", + " Downloading importlib_metadata-4.13.0-py3-none-any.whl (23 kB)\n", + "Requirement already satisfied: pandas<2.0.0,>=1.3.5 in /usr/local/lib/python3.8/dist-packages (from basedosdados==1.6.10b1) (1.3.5)\n", + "Requirement already satisfied: toml<0.11.0,>=0.10.2 in /usr/local/lib/python3.8/dist-packages (from basedosdados==1.6.10b1) (0.10.2)\n", + "Collecting google-cloud-bigquery==2.30.1\n", + " Downloading google_cloud_bigquery-2.30.1-py2.py3-none-any.whl (203 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m204.0/204.0 KB\u001B[0m \u001B[31m8.6 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting click==8.0.3\n", + " Downloading click-8.0.3-py3-none-any.whl (97 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m97.5/97.5 KB\u001B[0m \u001B[31m7.7 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting pandavro<2.0.0,>=1.6.0\n", + " Downloading pandavro-1.7.1.tar.gz (8.1 kB)\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + "Collecting ckanapi==4.6\n", + " Downloading ckanapi-4.6.tar.gz (32 kB)\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + "Collecting google-cloud-bigquery-storage==1.1.0\n", + " Downloading google_cloud_bigquery_storage-1.1.0-py2.py3-none-any.whl (135 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m135.2/135.2 KB\u001B[0m \u001B[31m13.8 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.8/dist-packages (from ckanapi==4.6->basedosdados==1.6.10b1) (57.4.0)\n", + "Collecting docopt\n", + " Downloading docopt-0.6.2.tar.gz (25 kB)\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from ckanapi==4.6->basedosdados==1.6.10b1) (2.25.1)\n", + "Requirement already satisfied: python-slugify>=1.0 in /usr/local/lib/python3.8/dist-packages (from ckanapi==4.6->basedosdados==1.6.10b1) (8.0.0)\n", + "Requirement already satisfied: six<2.0,>=1.9 in /usr/local/lib/python3.8/dist-packages (from ckanapi==4.6->basedosdados==1.6.10b1) (1.15.0)\n", + "Requirement already satisfied: proto-plus>=1.10.0 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (1.22.2)\n", + "Requirement already satisfied: grpcio<2.0dev,>=1.38.1 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (1.51.1)\n", + "Requirement already satisfied: google-resumable-media<3.0dev,>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (2.4.1)\n", + "Requirement already satisfied: google-cloud-core<3.0.0dev,>=1.4.1 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (2.3.2)\n", + "Requirement already satisfied: protobuf>=3.12.0 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (3.19.6)\n", + "Requirement already satisfied: google-api-core[grpc]<3.0.0dev,>=1.29.0 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (2.11.0)\n", + "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (23.0)\n", + "Requirement already satisfied: python-dateutil<3.0dev,>=2.7.2 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (2.8.2)\n", + "Collecting google-api-core[grpc]<3.0.0dev,>=1.29.0\n", + " Downloading google_api_core-1.34.0-py3-none-any.whl (120 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m120.2/120.2 KB\u001B[0m \u001B[31m11.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: google-auth<3.0dev,>=1.25.0 in /usr/local/lib/python3.8/dist-packages (from google-cloud-storage==1.42.3->basedosdados==1.6.10b1) (2.16.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from Jinja2==3.0.3->basedosdados==1.6.10b1) (2.0.1)\n", + "Requirement already satisfied: PyYAML in /usr/local/lib/python3.8/dist-packages (from pyaml==20.4.0->basedosdados==1.6.10b1) (6.0)\n", + "Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.8/dist-packages (from pyarrow==6.0.0->basedosdados==1.6.10b1) (1.21.6)\n", + "Collecting ruamel.yaml.clib>=0.1.2\n", + " Downloading ruamel.yaml.clib-0.2.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (555 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m555.3/555.3 KB\u001B[0m \u001B[31m40.8 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata<5.0.0,>=4.11.3->basedosdados==1.6.10b1) (3.12.1)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<2.0.0,>=1.3.5->basedosdados==1.6.10b1) (2022.7.1)\n", + "Requirement already satisfied: db-dtypes<2.0.0,>=0.3.1 in /usr/local/lib/python3.8/dist-packages (from pandas-gbq<0.18.0,>=0.17.4->basedosdados==1.6.10b1) (1.0.5)\n", + "Requirement already satisfied: pydata-google-auth in /usr/local/lib/python3.8/dist-packages (from pandas-gbq<0.18.0,>=0.17.4->basedosdados==1.6.10b1) (1.7.0)\n", + "Requirement already satisfied: google-auth-oauthlib>=0.0.1 in /usr/local/lib/python3.8/dist-packages (from pandas-gbq<0.18.0,>=0.17.4->basedosdados==1.6.10b1) (0.4.6)\n", + "Collecting fastavro==1.5.1\n", + " Downloading fastavro-1.5.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.6 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m2.6/2.6 MB\u001B[0m \u001B[31m68.8 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: googleapis-common-protos<2.0dev,>=1.56.2 in /usr/local/lib/python3.8/dist-packages (from google-api-core[grpc]<3.0.0dev,>=1.29.0->google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (1.58.0)\n", + "Requirement already satisfied: grpcio-status<2.0dev,>=1.33.2 in /usr/local/lib/python3.8/dist-packages (from google-api-core[grpc]<3.0.0dev,>=1.29.0->google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (1.48.2)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.8/dist-packages (from google-auth<3.0dev,>=1.25.0->google-cloud-storage==1.42.3->basedosdados==1.6.10b1) (0.2.8)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.8/dist-packages (from google-auth<3.0dev,>=1.25.0->google-cloud-storage==1.42.3->basedosdados==1.6.10b1) (4.9)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from google-auth<3.0dev,>=1.25.0->google-cloud-storage==1.42.3->basedosdados==1.6.10b1) (5.3.0)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.8/dist-packages (from google-auth-oauthlib>=0.0.1->pandas-gbq<0.18.0,>=0.17.4->basedosdados==1.6.10b1) (1.3.1)\n", + "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /usr/local/lib/python3.8/dist-packages (from google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (1.5.0)\n", + "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.8/dist-packages (from python-slugify>=1.0->ckanapi==4.6->basedosdados==1.6.10b1) (1.3)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->ckanapi==4.6->basedosdados==1.6.10b1) (4.0.0)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->ckanapi==4.6->basedosdados==1.6.10b1) (2.10)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->ckanapi==4.6->basedosdados==1.6.10b1) (1.24.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->ckanapi==4.6->basedosdados==1.6.10b1) (2022.12.7)\n", + "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.8/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3.0dev,>=1.25.0->google-cloud-storage==1.42.3->basedosdados==1.6.10b1) (0.4.8)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib>=0.0.1->pandas-gbq<0.18.0,>=0.17.4->basedosdados==1.6.10b1) (3.2.2)\n", + "Building wheels for collected packages: ckanapi, pandavro, docopt\n", + " Building wheel for ckanapi (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Created wheel for ckanapi: filename=ckanapi-4.6-py3-none-any.whl size=40701 sha256=1c6049414adf841fb929bba135b92650b10af7deced4991b8235c966c99d9137\n", + " Stored in directory: /root/.cache/pip/wheels/6b/4e/93/b6eda5f801fa21fb69bf74b30fe6cad4d5d9640879478ae85e\n", + " Building wheel for pandavro (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Created wheel for pandavro: filename=pandavro-1.7.1-py3-none-any.whl size=5688 sha256=1c3bbc63c548ca2cdcb4d249e5e494895aa059d8dc1706393f451fd8295abc1f\n", + " Stored in directory: /root/.cache/pip/wheels/85/cc/15/480a3cfa1a9fc49e2af9bde7437bd6c8c0265f17e61f32672b\n", + " Building wheel for docopt (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13723 sha256=fa264d415ef6ef26779ebabc9b3b731cf609426cb17dc551f7729463b1519c16\n", + " Stored in directory: /root/.cache/pip/wheels/56/ea/58/ead137b087d9e326852a851351d1debf4ada529b6ac0ec4e8c\n", + "Successfully built ckanapi pandavro docopt\n", + "Installing collected packages: docopt, tqdm, tomlkit, shapely, ruamel.yaml.clib, pyarrow, pyaml, loguru, Jinja2, importlib-metadata, fastavro, click, ruamel.yaml, ckanapi, pandavro, google-api-core, google-cloud-storage, google-cloud-bigquery-storage, google-cloud-bigquery, basedosdados\n", + " Attempting uninstall: tqdm\n", + " Found existing installation: tqdm 4.64.1\n", + " Uninstalling tqdm-4.64.1:\n", + " Successfully uninstalled tqdm-4.64.1\n", + " Attempting uninstall: shapely\n", + " Found existing installation: shapely 2.0.1\n", + " Uninstalling shapely-2.0.1:\n", + " Successfully uninstalled shapely-2.0.1\n", + " Attempting uninstall: pyarrow\n", + " Found existing installation: pyarrow 9.0.0\n", + " Uninstalling pyarrow-9.0.0:\n", + " Successfully uninstalled pyarrow-9.0.0\n", + " Attempting uninstall: Jinja2\n", + " Found existing installation: Jinja2 2.11.3\n", + " Uninstalling Jinja2-2.11.3:\n", + " Successfully uninstalled Jinja2-2.11.3\n", + " Attempting uninstall: importlib-metadata\n", + " Found existing installation: importlib-metadata 6.0.0\n", + " Uninstalling importlib-metadata-6.0.0:\n", + " Successfully uninstalled importlib-metadata-6.0.0\n", + " Attempting uninstall: click\n", + " Found existing installation: click 7.1.2\n", + " Uninstalling click-7.1.2:\n", + " Successfully uninstalled click-7.1.2\n", + " Attempting uninstall: google-api-core\n", + " Found existing installation: google-api-core 2.11.0\n", + " Uninstalling google-api-core-2.11.0:\n", + " Successfully uninstalled google-api-core-2.11.0\n", + " Attempting uninstall: google-cloud-storage\n", + " Found existing installation: google-cloud-storage 2.7.0\n", + " Uninstalling google-cloud-storage-2.7.0:\n", + " Successfully uninstalled google-cloud-storage-2.7.0\n", + " Attempting uninstall: google-cloud-bigquery-storage\n", + " Found existing installation: google-cloud-bigquery-storage 2.18.1\n", + " Uninstalling google-cloud-bigquery-storage-2.18.1:\n", + " Successfully uninstalled google-cloud-bigquery-storage-2.18.1\n", + " Attempting uninstall: google-cloud-bigquery\n", + " Found existing installation: google-cloud-bigquery 3.4.2\n", + " Uninstalling google-cloud-bigquery-3.4.2:\n", + " Successfully uninstalled google-cloud-bigquery-3.4.2\n", + "\u001B[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "notebook 5.7.16 requires jinja2<=3.0.0, but you have jinja2 3.0.3 which is incompatible.\n", + "flask 1.1.4 requires click<8.0,>=5.1, but you have click 8.0.3 which is incompatible.\n", + "flask 1.1.4 requires Jinja2<3.0,>=2.10.1, but you have jinja2 3.0.3 which is incompatible.\u001B[0m\u001B[31m\n", + "\u001B[0mSuccessfully installed Jinja2-3.0.3 basedosdados-1.6.10b1 ckanapi-4.6 click-8.0.3 docopt-0.6.2 fastavro-1.5.1 google-api-core-1.34.0 google-cloud-bigquery-2.30.1 google-cloud-bigquery-storage-1.1.0 google-cloud-storage-1.42.3 importlib-metadata-4.13.0 loguru-0.6.0 pandavro-1.7.1 pyaml-20.4.0 pyarrow-6.0.0 ruamel.yaml-0.17.10 ruamel.yaml.clib-0.2.7 shapely-1.8.5.post1 tomlkit-0.7.0 tqdm-4.50.2\n" + ] } + ], + "source": [ + "!pip install basedosdados==1.6.10b1" + ] }, - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "PDraahgvOhMo", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "7eed9265-4309-4bf2-a9b3-305e9c78d778" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Collecting basedosdados==1.6.10b1\n", - " Downloading basedosdados-1.6.10b1-py3-none-any.whl (51 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.0/51.0 KB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: pandas-gbq<0.18.0,>=0.17.4 in /usr/local/lib/python3.8/dist-packages (from basedosdados==1.6.10b1) (0.17.9)\n", - "Collecting Jinja2==3.0.3\n", - " Downloading Jinja2-3.0.3-py3-none-any.whl (133 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m133.6/133.6 KB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pyaml==20.4.0\n", - " Downloading pyaml-20.4.0-py2.py3-none-any.whl (17 kB)\n", - "Collecting pyarrow==6.0.0\n", - " Downloading pyarrow-6.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m25.6/25.6 MB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting tomlkit==0.7.0\n", - " Downloading tomlkit-0.7.0-py2.py3-none-any.whl (32 kB)\n", - "Collecting ruamel.yaml==0.17.10\n", - " Downloading ruamel.yaml-0.17.10-py3-none-any.whl (108 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m108.4/108.4 KB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting tqdm==4.50.2\n", - " Downloading tqdm-4.50.2-py2.py3-none-any.whl (70 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.9/70.9 KB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting google-cloud-storage==1.42.3\n", - " Downloading google_cloud_storage-1.42.3-py2.py3-none-any.whl (105 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.0/106.0 KB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting shapely<2.0.0,>=1.6.0\n", - " Downloading Shapely-1.8.5.post1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m22.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting loguru<0.7.0,>=0.6.0\n", - " Downloading loguru-0.6.0-py3-none-any.whl (58 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 KB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting importlib-metadata<5.0.0,>=4.11.3\n", - " Downloading importlib_metadata-4.13.0-py3-none-any.whl (23 kB)\n", - "Requirement already satisfied: pandas<2.0.0,>=1.3.5 in /usr/local/lib/python3.8/dist-packages (from basedosdados==1.6.10b1) (1.3.5)\n", - "Requirement already satisfied: toml<0.11.0,>=0.10.2 in /usr/local/lib/python3.8/dist-packages (from basedosdados==1.6.10b1) (0.10.2)\n", - "Collecting google-cloud-bigquery==2.30.1\n", - " Downloading google_cloud_bigquery-2.30.1-py2.py3-none-any.whl (203 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m204.0/204.0 KB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting click==8.0.3\n", - " Downloading click-8.0.3-py3-none-any.whl (97 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.5/97.5 KB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pandavro<2.0.0,>=1.6.0\n", - " Downloading pandavro-1.7.1.tar.gz (8.1 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting ckanapi==4.6\n", - " Downloading ckanapi-4.6.tar.gz (32 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Collecting google-cloud-bigquery-storage==1.1.0\n", - " Downloading google_cloud_bigquery_storage-1.1.0-py2.py3-none-any.whl (135 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.2/135.2 KB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.8/dist-packages (from ckanapi==4.6->basedosdados==1.6.10b1) (57.4.0)\n", - "Collecting docopt\n", - " Downloading docopt-0.6.2.tar.gz (25 kB)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from ckanapi==4.6->basedosdados==1.6.10b1) (2.25.1)\n", - "Requirement already satisfied: python-slugify>=1.0 in /usr/local/lib/python3.8/dist-packages (from ckanapi==4.6->basedosdados==1.6.10b1) (8.0.0)\n", - "Requirement already satisfied: six<2.0,>=1.9 in /usr/local/lib/python3.8/dist-packages (from ckanapi==4.6->basedosdados==1.6.10b1) (1.15.0)\n", - "Requirement already satisfied: proto-plus>=1.10.0 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (1.22.2)\n", - "Requirement already satisfied: grpcio<2.0dev,>=1.38.1 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (1.51.1)\n", - "Requirement already satisfied: google-resumable-media<3.0dev,>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (2.4.1)\n", - "Requirement already satisfied: google-cloud-core<3.0.0dev,>=1.4.1 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (2.3.2)\n", - "Requirement already satisfied: protobuf>=3.12.0 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (3.19.6)\n", - "Requirement already satisfied: google-api-core[grpc]<3.0.0dev,>=1.29.0 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (2.11.0)\n", - "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (23.0)\n", - "Requirement already satisfied: python-dateutil<3.0dev,>=2.7.2 in /usr/local/lib/python3.8/dist-packages (from google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (2.8.2)\n", - "Collecting google-api-core[grpc]<3.0.0dev,>=1.29.0\n", - " Downloading google_api_core-1.34.0-py3-none-any.whl (120 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m120.2/120.2 KB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: google-auth<3.0dev,>=1.25.0 in /usr/local/lib/python3.8/dist-packages (from google-cloud-storage==1.42.3->basedosdados==1.6.10b1) (2.16.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from Jinja2==3.0.3->basedosdados==1.6.10b1) (2.0.1)\n", - "Requirement already satisfied: PyYAML in /usr/local/lib/python3.8/dist-packages (from pyaml==20.4.0->basedosdados==1.6.10b1) (6.0)\n", - "Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.8/dist-packages (from pyarrow==6.0.0->basedosdados==1.6.10b1) (1.21.6)\n", - "Collecting ruamel.yaml.clib>=0.1.2\n", - " Downloading ruamel.yaml.clib-0.2.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (555 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m555.3/555.3 KB\u001b[0m \u001b[31m40.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata<5.0.0,>=4.11.3->basedosdados==1.6.10b1) (3.12.1)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<2.0.0,>=1.3.5->basedosdados==1.6.10b1) (2022.7.1)\n", - "Requirement already satisfied: db-dtypes<2.0.0,>=0.3.1 in /usr/local/lib/python3.8/dist-packages (from pandas-gbq<0.18.0,>=0.17.4->basedosdados==1.6.10b1) (1.0.5)\n", - "Requirement already satisfied: pydata-google-auth in /usr/local/lib/python3.8/dist-packages (from pandas-gbq<0.18.0,>=0.17.4->basedosdados==1.6.10b1) (1.7.0)\n", - "Requirement already satisfied: google-auth-oauthlib>=0.0.1 in /usr/local/lib/python3.8/dist-packages (from pandas-gbq<0.18.0,>=0.17.4->basedosdados==1.6.10b1) (0.4.6)\n", - "Collecting fastavro==1.5.1\n", - " Downloading fastavro-1.5.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m68.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: googleapis-common-protos<2.0dev,>=1.56.2 in /usr/local/lib/python3.8/dist-packages (from google-api-core[grpc]<3.0.0dev,>=1.29.0->google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (1.58.0)\n", - "Requirement already satisfied: grpcio-status<2.0dev,>=1.33.2 in /usr/local/lib/python3.8/dist-packages (from google-api-core[grpc]<3.0.0dev,>=1.29.0->google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (1.48.2)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.8/dist-packages (from google-auth<3.0dev,>=1.25.0->google-cloud-storage==1.42.3->basedosdados==1.6.10b1) (0.2.8)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.8/dist-packages (from google-auth<3.0dev,>=1.25.0->google-cloud-storage==1.42.3->basedosdados==1.6.10b1) (4.9)\n", - "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from google-auth<3.0dev,>=1.25.0->google-cloud-storage==1.42.3->basedosdados==1.6.10b1) (5.3.0)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.8/dist-packages (from google-auth-oauthlib>=0.0.1->pandas-gbq<0.18.0,>=0.17.4->basedosdados==1.6.10b1) (1.3.1)\n", - "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /usr/local/lib/python3.8/dist-packages (from google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery==2.30.1->basedosdados==1.6.10b1) (1.5.0)\n", - "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/lib/python3.8/dist-packages (from python-slugify>=1.0->ckanapi==4.6->basedosdados==1.6.10b1) (1.3)\n", - "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->ckanapi==4.6->basedosdados==1.6.10b1) (4.0.0)\n", - "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->ckanapi==4.6->basedosdados==1.6.10b1) (2.10)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->ckanapi==4.6->basedosdados==1.6.10b1) (1.24.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->ckanapi==4.6->basedosdados==1.6.10b1) (2022.12.7)\n", - "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.8/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3.0dev,>=1.25.0->google-cloud-storage==1.42.3->basedosdados==1.6.10b1) (0.4.8)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib>=0.0.1->pandas-gbq<0.18.0,>=0.17.4->basedosdados==1.6.10b1) (3.2.2)\n", - "Building wheels for collected packages: ckanapi, pandavro, docopt\n", - " Building wheel for ckanapi (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for ckanapi: filename=ckanapi-4.6-py3-none-any.whl size=40701 sha256=1c6049414adf841fb929bba135b92650b10af7deced4991b8235c966c99d9137\n", - " Stored in directory: /root/.cache/pip/wheels/6b/4e/93/b6eda5f801fa21fb69bf74b30fe6cad4d5d9640879478ae85e\n", - " Building wheel for pandavro (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for pandavro: filename=pandavro-1.7.1-py3-none-any.whl size=5688 sha256=1c3bbc63c548ca2cdcb4d249e5e494895aa059d8dc1706393f451fd8295abc1f\n", - " Stored in directory: /root/.cache/pip/wheels/85/cc/15/480a3cfa1a9fc49e2af9bde7437bd6c8c0265f17e61f32672b\n", - " Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13723 sha256=fa264d415ef6ef26779ebabc9b3b731cf609426cb17dc551f7729463b1519c16\n", - " Stored in directory: /root/.cache/pip/wheels/56/ea/58/ead137b087d9e326852a851351d1debf4ada529b6ac0ec4e8c\n", - "Successfully built ckanapi pandavro docopt\n", - "Installing collected packages: docopt, tqdm, tomlkit, shapely, ruamel.yaml.clib, pyarrow, pyaml, loguru, Jinja2, importlib-metadata, fastavro, click, ruamel.yaml, ckanapi, pandavro, google-api-core, google-cloud-storage, google-cloud-bigquery-storage, google-cloud-bigquery, basedosdados\n", - " Attempting uninstall: tqdm\n", - " Found existing installation: tqdm 4.64.1\n", - " Uninstalling tqdm-4.64.1:\n", - " Successfully uninstalled tqdm-4.64.1\n", - " Attempting uninstall: shapely\n", - " Found existing installation: shapely 2.0.1\n", - " Uninstalling shapely-2.0.1:\n", - " Successfully uninstalled shapely-2.0.1\n", - " Attempting uninstall: pyarrow\n", - " Found existing installation: pyarrow 9.0.0\n", - " Uninstalling pyarrow-9.0.0:\n", - " Successfully uninstalled pyarrow-9.0.0\n", - " Attempting uninstall: Jinja2\n", - " Found existing installation: Jinja2 2.11.3\n", - " Uninstalling Jinja2-2.11.3:\n", - " Successfully uninstalled Jinja2-2.11.3\n", - " Attempting uninstall: importlib-metadata\n", - " Found existing installation: importlib-metadata 6.0.0\n", - " Uninstalling importlib-metadata-6.0.0:\n", - " Successfully uninstalled importlib-metadata-6.0.0\n", - " Attempting uninstall: click\n", - " Found existing installation: click 7.1.2\n", - " Uninstalling click-7.1.2:\n", - " Successfully uninstalled click-7.1.2\n", - " Attempting uninstall: google-api-core\n", - " Found existing installation: google-api-core 2.11.0\n", - " Uninstalling google-api-core-2.11.0:\n", - " Successfully uninstalled google-api-core-2.11.0\n", - " Attempting uninstall: google-cloud-storage\n", - " Found existing installation: google-cloud-storage 2.7.0\n", - " Uninstalling google-cloud-storage-2.7.0:\n", - " Successfully uninstalled google-cloud-storage-2.7.0\n", - " Attempting uninstall: google-cloud-bigquery-storage\n", - " Found existing installation: google-cloud-bigquery-storage 2.18.1\n", - " Uninstalling google-cloud-bigquery-storage-2.18.1:\n", - " Successfully uninstalled google-cloud-bigquery-storage-2.18.1\n", - " Attempting uninstall: google-cloud-bigquery\n", - " Found existing installation: google-cloud-bigquery 3.4.2\n", - " Uninstalling google-cloud-bigquery-3.4.2:\n", - " Successfully uninstalled google-cloud-bigquery-3.4.2\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "notebook 5.7.16 requires jinja2<=3.0.0, but you have jinja2 3.0.3 which is incompatible.\n", - "flask 1.1.4 requires click<8.0,>=5.1, but you have click 8.0.3 which is incompatible.\n", - "flask 1.1.4 requires Jinja2<3.0,>=2.10.1, but you have jinja2 3.0.3 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed Jinja2-3.0.3 basedosdados-1.6.10b1 ckanapi-4.6 click-8.0.3 docopt-0.6.2 fastavro-1.5.1 google-api-core-1.34.0 google-cloud-bigquery-2.30.1 google-cloud-bigquery-storage-1.1.0 google-cloud-storage-1.42.3 importlib-metadata-4.13.0 loguru-0.6.0 pandavro-1.7.1 pyaml-20.4.0 pyarrow-6.0.0 ruamel.yaml-0.17.10 ruamel.yaml.clib-0.2.7 shapely-1.8.5.post1 tomlkit-0.7.0 tqdm-4.50.2\n" - ] - } - ], - "source": [ - "!pip install basedosdados==1.6.10b1" - ] - }, - { - "cell_type": "code", - "source": [ - "import pandas as pd\n", - "pd.set_option(\"display.max_columns\", None)\n", - "import basedosdados as bd\n", - "import zipfile\n", - "from zipfile import ZipFile" - ], - "metadata": { - "id": "dSbj4uDkOu0n" - }, - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "### Brasil" - ], - "metadata": { - "id": "H2KTESTddnNE" - } - }, - { - "cell_type": "code", - "source": [ - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/AFD_2022_BRASIL_REGIOES_UF.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ICG_2022_BRASIL_REGIOES_UFS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IED_2022_BRASIL_REGIOES_UFS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ATU_2022_BRASIL_REGIOES_UFS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/HAD_2022_BRASIL_REGIOES_UFS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/DSU_2022_BRASIL_REGIOES_UFS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IRD_2022_BRASIL_REGIOES_UFS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/TDI_2022_BRASIL_REGIOES_UFS.zip" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "8UuaE8D1ezML", - "outputId": "640eaa3c-c4b6-46af-a234-61e65d6f1b1e" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2023-02-11 12:42:18-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/AFD_2022_BRASIL_REGIOES_UF.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 279399 (273K) [application/zip]\n", - "Saving to: ‘AFD_2022_BRASIL_REGIOES_UF.zip’\n", - "\n", - "AFD_2022_BRASIL_REG 100%[===================>] 272.85K 347KB/s in 0.8s \n", - "\n", - "2023-02-11 12:42:20 (347 KB/s) - ‘AFD_2022_BRASIL_REGIOES_UF.zip’ saved [279399/279399]\n", - "\n", - "--2023-02-11 12:42:20-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ICG_2022_BRASIL_REGIOES_UFS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 91142 (89K) [application/zip]\n", - "Saving to: ‘ICG_2022_BRASIL_REGIOES_UFS.zip’\n", - "\n", - "ICG_2022_BRASIL_REG 100%[===================>] 89.01K 189KB/s in 0.5s \n", - "\n", - "2023-02-11 12:42:21 (189 KB/s) - ‘ICG_2022_BRASIL_REGIOES_UFS.zip’ saved [91142/91142]\n", - "\n", - "--2023-02-11 12:42:21-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IED_2022_BRASIL_REGIOES_UFS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 217268 (212K) [application/zip]\n", - "Saving to: ‘IED_2022_BRASIL_REGIOES_UFS.zip’\n", - "\n", - "IED_2022_BRASIL_REG 100%[===================>] 212.18K 193KB/s in 1.1s \n", - "\n", - "2023-02-11 12:42:23 (193 KB/s) - ‘IED_2022_BRASIL_REGIOES_UFS.zip’ saved [217268/217268]\n", - "\n", - "--2023-02-11 12:42:23-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ATU_2022_BRASIL_REGIOES_UFS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 198113 (193K) [application/zip]\n", - "Saving to: ‘ATU_2022_BRASIL_REGIOES_UFS.zip’\n", - "\n", - "ATU_2022_BRASIL_REG 100%[===================>] 193.47K 308KB/s in 0.6s \n", - "\n", - "2023-02-11 12:42:24 (308 KB/s) - ‘ATU_2022_BRASIL_REGIOES_UFS.zip’ saved [198113/198113]\n", - "\n", - "--2023-02-11 12:42:25-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/HAD_2022_BRASIL_REGIOES_UFS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 84853 (83K) [application/zip]\n", - "Saving to: ‘HAD_2022_BRASIL_REGIOES_UFS.zip’\n", - "\n", - "HAD_2022_BRASIL_REG 100%[===================>] 82.86K 233KB/s in 0.4s \n", - "\n", - "2023-02-11 12:42:26 (233 KB/s) - ‘HAD_2022_BRASIL_REGIOES_UFS.zip’ saved [84853/84853]\n", - "\n", - "--2023-02-11 12:42:26-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/DSU_2022_BRASIL_REGIOES_UFS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 118154 (115K) [application/zip]\n", - "Saving to: ‘DSU_2022_BRASIL_REGIOES_UFS.zip’\n", - "\n", - "DSU_2022_BRASIL_REG 100%[===================>] 115.38K 184KB/s in 0.6s \n", - "\n", - "2023-02-11 12:42:27 (184 KB/s) - ‘DSU_2022_BRASIL_REGIOES_UFS.zip’ saved [118154/118154]\n", - "\n", - "--2023-02-11 12:42:27-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IRD_2022_BRASIL_REGIOES_UFS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 81096 (79K) [application/zip]\n", - "Saving to: ‘IRD_2022_BRASIL_REGIOES_UFS.zip’\n", - "\n", - "IRD_2022_BRASIL_REG 100%[===================>] 79.20K 252KB/s in 0.3s \n", - "\n", - "2023-02-11 12:42:28 (252 KB/s) - ‘IRD_2022_BRASIL_REGIOES_UFS.zip’ saved [81096/81096]\n", - "\n", - "--2023-02-11 12:42:28-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/TDI_2022_BRASIL_REGIOES_UFS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 172134 (168K) [application/zip]\n", - "Saving to: ‘TDI_2022_BRASIL_REGIOES_UFS.zip’\n", - "\n", - "TDI_2022_BRASIL_REG 100%[===================>] 168.10K 267KB/s in 0.6s \n", - "\n", - "2023-02-11 12:42:30 (267 KB/s) - ‘TDI_2022_BRASIL_REGIOES_UFS.zip’ saved [172134/172134]\n", - "\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "ordem = ['localizacao', 'rede', 'atu_ei', 'atu_ei_creche', 'atu_ei_pre_escola', 'atu_ef', 'atu_ef_anos_iniciais',\n", - " 'atu_ef_anos_finais', 'atu_ef_1_ano', 'atu_ef_2_ano', 'atu_ef_3_ano', 'atu_ef_4_ano', 'atu_ef_5_ano',\n", - " 'atu_ef_6_ano', 'atu_ef_7_ano', 'atu_ef_8_ano', 'atu_ef_9_ano', 'atu_ef_turmas_unif_multi_fluxo',\n", - " 'atu_em', 'atu_em_1_ano', 'atu_em_2_ano', 'atu_em_3_ano', 'atu_em_4_ano', 'atu_em_nao_seriado', 'had_ei',\n", - " 'had_ei_creche', 'had_ei_pre_escola', 'had_ef', 'had_ef_anos_iniciais', 'had_ef_anos_finais', 'had_ef_1_ano',\n", - " 'had_ef_2_ano', 'had_ef_3_ano', 'had_ef_4_ano', 'had_ef_5_ano', 'had_ef_6_ano', 'had_ef_7_ano',\n", - " 'had_ef_8_ano', 'had_ef_9_ano', 'had_em', 'had_em_1_ano', 'had_em_2_ano', 'had_em_3_ano', 'had_em_4_ano',\n", - " 'had_em_nao_seriado', 'tdi_ef', 'tdi_ef_anos_iniciais', 'tdi_ef_anos_finais', 'tdi_ef_1_ano', 'tdi_ef_2_ano',\n", - " 'tdi_ef_3_ano', 'tdi_ef_4_ano', 'tdi_ef_5_ano', 'tdi_ef_6_ano', 'tdi_ef_7_ano', 'tdi_ef_8_ano', 'tdi_ef_9_ano',\n", - " 'tdi_em', 'tdi_em_1_ano', 'tdi_em_2_ano', 'tdi_em_3_ano', 'tdi_em_4_ano', 'taxa_aprovacao_ef', \n", - " 'taxa_aprovacao_ef_anos_iniciais', 'taxa_aprovacao_ef_anos_finais', 'taxa_aprovacao_ef_1_ano',\n", - " 'taxa_aprovacao_ef_2_ano', 'taxa_aprovacao_ef_3_ano', 'taxa_aprovacao_ef_4_ano', 'taxa_aprovacao_ef_5_ano',\n", - " 'taxa_aprovacao_ef_6_ano', 'taxa_aprovacao_ef_7_ano', 'taxa_aprovacao_ef_8_ano', 'taxa_aprovacao_ef_9_ano',\n", - " 'taxa_aprovacao_em', 'taxa_aprovacao_em_1_ano', 'taxa_aprovacao_em_2_ano', 'taxa_aprovacao_em_3_ano',\n", - " 'taxa_aprovacao_em_4_ano', 'taxa_aprovacao_em_nao_seriado', 'taxa_reprovacao_ef', 'taxa_reprovacao_ef_anos_iniciais',\n", - " 'taxa_reprovacao_ef_anos_finais', 'taxa_reprovacao_ef_1_ano', 'taxa_reprovacao_ef_2_ano', 'taxa_reprovacao_ef_3_ano',\n", - " 'taxa_reprovacao_ef_4_ano', 'taxa_reprovacao_ef_5_ano', 'taxa_reprovacao_ef_6_ano', 'taxa_reprovacao_ef_7_ano',\n", - " 'taxa_reprovacao_ef_8_ano' ,'taxa_reprovacao_ef_9_ano', 'taxa_reprovacao_em', 'taxa_reprovacao_em_1_ano',\n", - " 'taxa_reprovacao_em_2_ano' ,'taxa_reprovacao_em_3_ano', 'taxa_reprovacao_em_4_ano' ,'taxa_reprovacao_em_nao_seriado',\n", - " 'taxa_abandono_ef' ,'taxa_abandono_ef_anos_iniciais', 'taxa_abandono_ef_anos_finais', 'taxa_abandono_ef_1_ano', \n", - " 'taxa_abandono_ef_2_ano' ,'taxa_abandono_ef_3_ano', 'taxa_abandono_ef_4_ano', 'taxa_abandono_ef_5_ano', \n", - " 'taxa_abandono_ef_6_ano', 'taxa_abandono_ef_7_ano', 'taxa_abandono_ef_8_ano' ,'taxa_abandono_ef_9_ano', \n", - " 'taxa_abandono_em', 'taxa_abandono_em_1_ano' ,'taxa_abandono_em_2_ano', 'taxa_abandono_em_3_ano', \n", - " 'taxa_abandono_em_4_ano', 'taxa_abandono_em_nao_seriado', 'tnr_ef', 'tnr_ef_anos_iniciais', 'tnr_ef_anos_finais', \n", - " 'tnr_ef_1_ano', 'tnr_ef_2_ano', 'tnr_ef_3_ano', 'tnr_ef_4_ano', 'tnr_ef_5_ano', 'tnr_ef_6_ano', 'tnr_ef_7_ano',\n", - " 'tnr_ef_8_ano', 'tnr_ef_9_ano', 'tnr_em' ,'tnr_em_1_ano', 'tnr_em_2_ano', 'tnr_em_3_ano', 'tnr_em_4_ano', \n", - " 'tnr_em_nao_seriado', 'dsu_ei', 'dsu_ei_creche', 'dsu_ei_pre_escola', 'dsu_ef', 'dsu_ef_anos_iniciais', \n", - " 'dsu_ef_anos_finais' ,'dsu_em', 'dsu_ep', 'dsu_eja', 'dsu_ee', 'afd_ei_grupo_1', 'afd_ei_grupo_2', \n", - " 'afd_ei_grupo_3', 'afd_ei_grupo_4', 'afd_ei_grupo_5', 'afd_ef_grupo_1' ,'afd_ef_grupo_2', 'afd_ef_grupo_3',\n", - " 'afd_ef_grupo_4', 'afd_ef_grupo_5', 'afd_ef_anos_iniciais_grupo_1', 'afd_ef_anos_iniciais_grupo_2', \n", - " 'afd_ef_anos_iniciais_grupo_3', 'afd_ef_anos_iniciais_grupo_4' ,'afd_ef_anos_iniciais_grupo_5',\n", - " 'afd_ef_anos_finais_grupo_1', 'afd_ef_anos_finais_grupo_2', 'afd_ef_anos_finais_grupo_3', \n", - " 'afd_ef_anos_finais_grupo_4', 'afd_ef_anos_finais_grupo_5', 'afd_em_grupo_1', 'afd_em_grupo_2', 'afd_em_grupo_3', \n", - " 'afd_em_grupo_4', 'afd_em_grupo_5', 'afd_eja_fundamental_grupo_1', 'afd_eja_fundamental_grupo_2', \n", - " 'afd_eja_fundamental_grupo_3' ,'afd_eja_fundamental_grupo_4', 'afd_eja_fundamental_grupo_5',\n", - " 'afd_eja_medio_grupo_1', 'afd_eja_medio_grupo_2', 'afd_eja_medio_grupo_3' ,'afd_eja_medio_grupo_4', \n", - " 'afd_eja_medio_grupo_5' ,'ird_baixa_regularidade' ,'ird_media_baixa', 'ird_media_alta', 'ird_alta', \n", - " 'ied_ef_nivel_1' ,'ied_ef_nivel_2', 'ied_ef_nivel_3', 'ied_ef_nivel_4', 'ied_ef_nivel_5', 'ied_ef_nivel_6',\n", - " 'ied_ef_anos_iniciais_nivel_1', 'ied_ef_anos_iniciais_nivel_2', 'ied_ef_anos_iniciais_nivel_3', \n", - " 'ied_ef_anos_iniciais_nivel_4', 'ied_ef_anos_iniciais_nivel_5' ,'ied_ef_anos_iniciais_nivel_6',\n", - " 'ied_ef_anos_finais_nivel_1', 'ied_ef_anos_finais_nivel_2', 'ied_ef_anos_finais_nivel_3', \n", - " 'ied_ef_anos_finais_nivel_4', 'ied_ef_anos_finais_nivel_5' ,'ied_ef_anos_finais_nivel_6',\n", - " 'ied_em_nivel_1' ,'ied_em_nivel_2', 'ied_em_nivel_3', 'ied_em_nivel_4', 'ied_em_nivel_5', 'ied_em_nivel_6', \n", - " 'icg_nivel_1', 'icg_nivel_2', 'icg_nivel_3', 'icg_nivel_4', 'icg_nivel_5', 'icg_nivel_6']" - ], - "metadata": { - "id": "ZxwfRbG6iOhJ" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "rename_afd = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', 'ED_INF_CAT_1':'afd_ei_grupo_1', 'ED_INF_CAT_2':'afd_ei_grupo_2',\n", - " 'ED_INF_CAT_3':'afd_ei_grupo_3', 'ED_INF_CAT_4':'afd_ei_grupo_4', 'ED_INF_CAT_5':'afd_ei_grupo_5', 'FUN_CAT_1':'afd_ef_grupo_1', \n", - " 'FUN_CAT_2':'afd_ef_grupo_2', 'FUN_CAT_3':'afd_ef_grupo_3', 'FUN_CAT_4':'afd_ef_grupo_4', 'FUN_CAT_5':'afd_ef_grupo_5', 'FUN_AI_CAT_1':'afd_ef_anos_iniciais_grupo_1',\n", - " 'FUN_AI_CAT_2':'afd_ef_anos_iniciais_grupo_2', 'FUN_AI_CAT_3':'afd_ef_anos_iniciais_grupo_3', 'FUN_AI_CAT_4':'afd_ef_anos_iniciais_grupo_4',\n", - " 'FUN_AI_CAT_5':'afd_ef_anos_iniciais_grupo_5', 'FUN_AF_CAT_1':'afd_ef_anos_finais_grupo_1', 'FUN_AF_CAT_2':'afd_ef_anos_finais_grupo_2',\n", - " 'FUN_AF_CAT_3':'afd_ef_anos_finais_grupo_3', 'FUN_AF_CAT_4':'afd_ef_anos_finais_grupo_4', 'FUN_AF_CAT_5':'afd_ef_anos_finais_grupo_5',\n", - " 'MED_CAT_1':'afd_em_grupo_1', 'MED_CAT_2':'afd_em_grupo_2', 'MED_CAT_3':'afd_em_grupo_3', 'MED_CAT_4':'afd_em_grupo_4', 'MED_CAT_5':'afd_em_grupo_5',\n", - " 'EJA_FUN_CAT_1':'afd_eja_fundamental_grupo_1', 'EJA_FUN_CAT_2':'afd_eja_fundamental_grupo_2', 'EJA_FUN_CAT_3':'afd_eja_fundamental_grupo_3',\n", - " 'EJA_FUN_CAT_4':'afd_eja_fundamental_grupo_4', 'EJA_FUN_CAT_5':'afd_eja_fundamental_grupo_5', 'EJA_MED_CAT_1':'afd_eja_medio_grupo_1',\n", - " 'EJA_MED_CAT_2':'afd_eja_medio_grupo_2', 'EJA_MED_CAT_3':'afd_eja_medio_grupo_3', 'EJA_MED_CAT_4':'afd_eja_medio_grupo_4', \n", - " 'EJA_MED_CAT_5':'afd_eja_medio_grupo_5'}\n", - "\n", - "with ZipFile('/content/AFD_2022_BRASIL_REGIOES_UF.zip') as z:\n", - " with z.open('AFD_2022_BRASIL_REGIOES_UF/AFD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " afd = pd.read_excel(f, skiprows=10, dtype=str)\n", - " afd = afd.replace('--', '')\n", - " afd = afd[afd.UNIDGEO.isin(['Brasil'])]\n", - " afd.rename(columns=rename_afd, inplace=True)\n", - " afd['localizacao'] = afd['localizacao'].str.lower().replace('pública', 'publica')\n", - " afd['rede'] = afd['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_atu = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'atu_ei', 'CRE_CAT_0':'atu_ei_creche', 'PRE_CAT_0':'atu_ei_pre_escola', 'FUN_CAT_0':'atu_ef',\n", - " 'FUN_AI_CAT_0':'atu_ef_anos_iniciais', 'FUN_AF_CAT_0':'atu_ef_anos_finais', 'FUN_01_CAT_0':'atu_ef_1_ano',\n", - " 'FUN_02_CAT_0':'atu_ef_2_ano', 'FUN_03_CAT_0':'atu_ef_3_ano', 'FUN_04_CAT_0':'atu_ef_4_ano',\n", - " 'FUN_05_CAT_0':'atu_ef_5_ano', 'FUN_06_CAT_0':'atu_ef_6_ano', 'FUN_07_CAT_0':'atu_ef_7_ano', \n", - " 'FUN_08_CAT_0':'atu_ef_8_ano', 'FUN_09_CAT_0':'atu_ef_9_ano', 'MULT_ETA_CAT_0':'atu_ef_turmas_unif_multi_fluxo',\n", - " 'MED_CAT_0':'atu_em', 'MED_01_CAT_0':'atu_em_1_ano', 'MED_02_CAT_0':'atu_em_2_ano', 'MED_03_CAT_0':'atu_em_3_ano',\n", - " 'MED_04_CAT_0':'atu_em_4_ano', 'MED_NS_CAT_0':'atu_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/ATU_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('ATU_2022_BRASIL_REGIOES_UFS/ATU_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " atu = pd.read_excel(f, skiprows=8, dtype=str)\n", - " atu = atu[atu.UNIDGEO.isin(['Brasil'])]\n", - " atu = atu.replace('--', '')\n", - " atu.rename(columns=rename_atu, inplace=True)\n", - " atu['localizacao'] = atu['localizacao'].str.lower().replace('pública', 'publica')\n", - " atu['rede'] = atu['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_dsu = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'dsu_ei', 'CRE_CAT_0':'dsu_ei_creche', 'PRE_CAT_0':'dsu_ei_pre_escola', 'FUN_CAT_0':'dsu_ef',\n", - " 'FUN_AI_CAT_0':'dsu_ef_anos_iniciais', 'FUN_AF_CAT_0':'dsu_ef_anos_finais', 'MED_CAT_0':'dsu_em',\n", - " 'PROF_CAT_0':'dsu_ep', 'EJA_CAT_0':'dsu_eja', 'EDU_BAS_CAT_0':'dsu_ee'}\n", - "\n", - "with ZipFile('/content/DSU_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('DSU_2022_BRASIL_REGIOES_UFS/DSU_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " dsu = pd.read_excel(f, skiprows=9, dtype=str)\n", - " dsu = dsu[dsu.UNIDGEO.isin(['Brasil'])]\n", - " dsu = dsu.replace('--', '')\n", - " dsu.rename(columns=rename_dsu, inplace=True)\n", - " dsu['localizacao'] = dsu['localizacao'].str.lower().replace('pública', 'publica')\n", - " dsu['rede'] = dsu['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_had = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'had_ei', 'CRE_CAT_0':'had_ei_creche', 'PRE_CAT_0':'had_ei_pre_escola',\n", - " 'FUN_CAT_0':'had_ef', 'FUN_AI_CAT_0':'had_ef_anos_iniciais', 'FUN_AF_CAT_0':'had_ef_anos_finais', 'FUN_01_CAT_0':'had_ef_1_ano',\n", - " 'FUN_02_CAT_0':'had_ef_2_ano', 'FUN_03_CAT_0':'had_ef_3_ano', 'FUN_04_CAT_0':'had_ef_4_ano', 'FUN_05_CAT_0':'had_ef_5_ano',\n", - " 'FUN_06_CAT_0':'had_ef_6_ano', 'FUN_07_CAT_0':'had_ef_7_ano', 'FUN_08_CAT_0':'had_ef_8_ano', 'FUN_09_CAT_0':'had_ef_9_ano',\n", - " 'MED_CAT_0':'had_em', 'MED_01_CAT_0':'had_em_1_ano', 'MED_02_CAT_0':'had_em_2_ano', 'MED_03_CAT_0':'had_em_3_ano',\n", - " 'MED_04_CAT_0':'had_em_4_ano', 'MED_NS_CAT_0':'had_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/HAD_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('HAD_2022_BRASIL_REGIOES_UFS/HAD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " had = pd.read_excel(f, skiprows=8, dtype=str)\n", - " had = had[had.UNIDGEO.isin(['Brasil'])]\n", - " had = had.replace('--', '')\n", - " had.rename(columns=rename_had, inplace=True)\n", - " had['localizacao'] = had['localizacao'].str.lower().replace('pública', 'publica')\n", - " had['rede'] = had['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_icg = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'EDU_BAS_CAT_1':'icg_nivel_1', 'EDU_BAS_CAT_2':'icg_nivel_2', 'EDU_BAS_CAT_3':'icg_nivel_3',\n", - " 'EDU_BAS_CAT_4':'icg_nivel_4', 'EDU_BAS_CAT_5':'icg_nivel_5', 'EDU_BAS_CAT_6':'icg_nivel_6'}\n", - "\n", - "with ZipFile('/content/ICG_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('ICG_2022_BRASIL_REGIOES_UFS/ICG_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " icg = pd.read_excel(f, skiprows=8, dtype=str)\n", - " icg = icg[icg.UNIDGEO.isin(['Brasil'])]\n", - " icg = icg.replace('--', '')\n", - " icg.rename(columns=rename_icg, inplace=True)\n", - " icg['localizacao'] = icg['localizacao'].str.lower().replace('pública', 'publica')\n", - " icg['rede'] = icg['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_ied = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'FUN_CAT_1':'ied_ef_nivel_1', 'FUN_CAT_2':'ied_ef_nivel_2', 'FUN_CAT_3':'ied_ef_nivel_3',\n", - " 'FUN_CAT_4':'ied_ef_nivel_4', 'FUN_CAT_5':'ied_ef_nivel_5', 'FUN_CAT_6':'ied_ef_nivel_6',\n", - " 'FUN_AI_CAT_1':'ied_ef_anos_iniciais_nivel_1', 'FUN_AI_CAT_2':'ied_ef_anos_iniciais_nivel_2',\n", - " 'FUN_AI_CAT_3':'ied_ef_anos_iniciais_nivel_3', 'FUN_AI_CAT_4':'ied_ef_anos_iniciais_nivel_4',\n", - " 'FUN_AI_CAT_5':'ied_ef_anos_iniciais_nivel_5', 'FUN_AI_CAT_6':'ied_ef_anos_iniciais_nivel_6',\n", - " 'FUN_AF_CAT_1':'ied_ef_anos_finais_nivel_1', 'FUN_AF_CAT_2':'ied_ef_anos_finais_nivel_2',\n", - " 'FUN_AF_CAT_3':'ied_ef_anos_finais_nivel_3', 'FUN_AF_CAT_4':'ied_ef_anos_finais_nivel_4',\n", - " 'FUN_AF_CAT_5':'ied_ef_anos_finais_nivel_5', 'FUN_AF_CAT_6':'ied_ef_anos_finais_nivel_6',\n", - " 'MED_CAT_1':'ied_em_nivel_1', 'MED_CAT_2':'ied_em_nivel_2', 'MED_CAT_3':'ied_em_nivel_3',\n", - " 'MED_CAT_4':'ied_em_nivel_4', 'MED_CAT_5':'ied_em_nivel_5', 'MED_CAT_6':'ied_em_nivel_6'}\n", - "\n", - "with ZipFile('/content/IED_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('IED_2022_BRASIL_REGIOES_UFS/IED_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " ied = pd.read_excel(f, skiprows=10, dtype=str)\n", - " ied = ied[ied.UNIDGEO.isin(['Brasil'])]\n", - " ied = ied.replace('--', '')\n", - " ied.rename(columns=rename_ied, inplace=True)\n", - " ied['localizacao'] = ied['localizacao'].str.lower().replace('pública', 'publica')\n", - " ied['rede'] = ied['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_ird = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'EDU_BAS_CAT_1':'ird_baixa_regularidade', 'EDU_BAS_CAT_2':'ird_media_baixa', \n", - " 'EDU_BAS_CAT_3':'ird_media_alta', 'EDU_BAS_CAT_4':'ird_alta'}\n", - "\n", - "with ZipFile('/content/IRD_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('IRD_2022_BRASIL_REGIOES_UFS/IRD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " ird = pd.read_excel(f, skiprows=9, dtype=str)\n", - " ird = ird[ird.UNIDGEO.isin(['Brasil'])]\n", - " ird = ird.replace('--', '')\n", - " ird.rename(columns=rename_ird, inplace=True)\n", - " ird['localizacao'] = ird['localizacao'].str.lower().replace('pública', 'publica')\n", - " ird['rede'] = ird['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_tdi = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'FUN_CAT_0':'tdi_ef', 'FUN_AI_CAT_0':'tdi_ef_anos_iniciais', 'FUN_AF_CAT_0':'tdi_ef_anos_finais',\n", - " 'FUN_01_CAT_0':'tdi_ef_1_ano', 'FUN_02_CAT_0':'tdi_ef_2_ano', 'FUN_03_CAT_0':'tdi_ef_3_ano',\n", - " 'FUN_04_CAT_0':'tdi_ef_4_ano', 'FUN_05_CAT_0':'tdi_ef_5_ano', 'FUN_06_CAT_0':'tdi_ef_6_ano',\n", - " 'FUN_07_CAT_0':'tdi_ef_7_ano', 'FUN_08_CAT_0':'tdi_ef_8_ano', 'FUN_09_CAT_0':'tdi_ef_9_ano',\n", - " 'MED_CAT_0':'tdi_em', 'MED_01_CAT_0':'tdi_em_1_ano', 'MED_02_CAT_0':'tdi_em_2_ano',\n", - " 'MED_03_CAT_0':'tdi_em_3_ano', 'MED_04_CAT_0':'tdi_em_4_ano'}\n", - "\n", - "with ZipFile('/content/TDI_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('TDI_2022_BRASIL_REGIOES_UFS/TDI_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " tdi = pd.read_excel(f, skiprows=8, dtype=str)\n", - " tdi = tdi[tdi.UNIDGEO.isin(['Brasil'])]\n", - " tdi = tdi.replace('--', '')\n", - " tdi.rename(columns=rename_tdi, inplace=True)\n", - " tdi['localizacao'] = tdi['localizacao'].str.lower().replace('pública', 'publica')\n", - " tdi['rede'] = tdi['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "# rename_tnr = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", - "# '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", - "# '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", - "# '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", - "# '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", - "# '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", - "\n", - "# with ZipFile('/content/tnr_municipios_2021.zip') as z:\n", - "# with z.open('tnr_municipios_2021/tnr_municipios_2021.xlsx') as f:\n", - "# tnr = pd.read_excel(f, skiprows=8, dtype=str)\n", - "# tnr = tnr[tnr.UNIDGEO.isin(['Brasil'])]\n", - "# tnr = tnr.replace('--', '')\n", - "# tnr.rename(columns=rename_tnr, inplace=True)\n", - "# tnr[index] = tnr[index].astype(str)\n", - "# tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", - "# tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", - " \n", - "\n", - "# rename_tx = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", - "# '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", - "# '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", - "# '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", - "# '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", - "# '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", - "# '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", - "# '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", - "# '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", - "# '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", - "# '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", - "# '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", - "# '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", - "# '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", - "# '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", - "# '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", - "# '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", - "# '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", - "# '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", - "\n", - "# with ZipFile('/content/tx_rend_municipios_2021.zip') as z:\n", - "# with z.open('tx_rend_municipios_2021/tx_rend_municipios_2021.xlsx') as f:\n", - "# tx = pd.read_excel(f, skiprows=8, dtype=str)\n", - "# tx = tx[tx.UNIDGEO.isin(['Brasil'])]\n", - "# tx = tx.replace('--', '')\n", - "# tx.rename(columns=rename_tx, inplace=True)\n", - "# tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", - "# tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", - " \n", - "index = ['ano',\t'UNIDGEO',\t'localizacao',\t'rede'] \n", - "df = pd.merge(afd, atu, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, dsu, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, had, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, icg, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, ied, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, ird, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, tdi, how='outer', left_on=index, right_on=index)\n", - "# df = pd.merge(df, tnr, how='outer', left_on=index, right_on=index)\n", - "# df = pd.merge(df, tx, how='outer', left_on=index, right_on=index)\n", - "\n", - "#COMO ALGUNS DADOS NÃO FORAM ATUALIZADOS, CRIA-SE DF2 COMO COMPLEMENTAR PARA A ORDENAÇÃO DA BASE\n", - "df_list = df.columns\n", - "df2 = list(set(ordem) - set(df_list))\n", - "df = pd.concat([df, pd.DataFrame(columns=df2)])\n", - "#ORDENA TODAS AS VARIÁVEIS \n", - "df = df[ordem]\n", - "\n", - "df.to_csv('/content/brasil.csv', index=False, encoding='utf-8', na_rep='')" - ], - "metadata": { - "id": "P_lweYEYiZWx" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "query = ''' \n", - "SELECT * FROM `basedosdados-dev.br_inep_indicadores_educacionais.brasil` WHERE ano = 2021\n", - "'''\n", - "\n", - "df_br = bd.read_sql(query, billing_project_id='input-bd')\n", - "\n", - "index = ['localizacao', 'rede']\n", - "drop = ['taxa_aprovacao_ef', 'taxa_aprovacao_ef_anos_iniciais', 'taxa_aprovacao_ef_anos_finais', 'taxa_aprovacao_ef_1_ano',\n", - " 'taxa_aprovacao_ef_2_ano', 'taxa_aprovacao_ef_3_ano', 'taxa_aprovacao_ef_4_ano', 'taxa_aprovacao_ef_5_ano',\n", - " 'taxa_aprovacao_ef_6_ano', 'taxa_aprovacao_ef_7_ano', 'taxa_aprovacao_ef_8_ano', 'taxa_aprovacao_ef_9_ano', 'taxa_aprovacao_em',\n", - " 'taxa_aprovacao_em_1_ano', 'taxa_aprovacao_em_2_ano', 'taxa_aprovacao_em_3_ano', 'taxa_aprovacao_em_4_ano',\n", - " 'taxa_aprovacao_em_nao_seriado', 'taxa_reprovacao_ef', 'taxa_reprovacao_ef_anos_iniciais', 'taxa_reprovacao_ef_anos_finais',\n", - " 'taxa_reprovacao_ef_1_ano', 'taxa_reprovacao_ef_2_ano', 'taxa_reprovacao_ef_3_ano', 'taxa_reprovacao_ef_4_ano',\n", - " 'taxa_reprovacao_ef_5_ano', 'taxa_reprovacao_ef_6_ano', 'taxa_reprovacao_ef_7_ano', 'taxa_reprovacao_ef_8_ano',\n", - " 'taxa_reprovacao_ef_9_ano', 'taxa_reprovacao_em', 'taxa_reprovacao_em_1_ano', 'taxa_reprovacao_em_2_ano',\n", - " 'taxa_reprovacao_em_3_ano', 'taxa_reprovacao_em_4_ano', 'taxa_reprovacao_em_nao_seriado', 'taxa_abandono_ef',\n", - " 'taxa_abandono_ef_anos_iniciais', 'taxa_abandono_ef_anos_finais', 'taxa_abandono_ef_1_ano', 'taxa_abandono_ef_2_ano',\n", - " 'taxa_abandono_ef_3_ano', 'taxa_abandono_ef_4_ano', 'taxa_abandono_ef_5_ano', 'taxa_abandono_ef_6_ano',\n", - " 'taxa_abandono_ef_7_ano', 'taxa_abandono_ef_8_ano', 'taxa_abandono_ef_9_ano', 'taxa_abandono_em', 'taxa_abandono_em_1_ano',\n", - " 'taxa_abandono_em_2_ano', 'taxa_abandono_em_3_ano', 'taxa_abandono_em_4_ano', 'taxa_abandono_em_nao_seriado',\n", - " 'tnr_ef', 'tnr_ef_anos_iniciais', 'tnr_ef_anos_finais', 'tnr_ef_1_ano', 'tnr_ef_2_ano', 'tnr_ef_3_ano', 'tnr_ef_4_ano',\n", - " 'tnr_ef_5_ano', 'tnr_ef_6_ano', 'tnr_ef_7_ano', 'tnr_ef_8_ano', 'tnr_ef_9_ano', 'tnr_em', 'tnr_em_1_ano', 'tnr_em_2_ano', 'tnr_em_3_ano',\n", - " 'tnr_em_4_ano', 'tnr_em_nao_seriado']\n", - "df_br.drop(drop, axis=1, inplace=True)\n", - "df_br = pd.merge(df_br, tnr, how='left', left_on=index, right_on=index)\n", - "df_br = pd.merge(df_br, tx, how='left', left_on=index, right_on=index)\n", - "df_br = df_br[ordem]\n", - "df_br.to_csv('/content/brasil.csv', index=False, encoding='utf-8', na_rep='')" - ], - "metadata": { - "id": "ZF1iUQgZvEK7" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "### Região" - ], - "metadata": { - "id": "0yfI7W9wjCf9" - } - }, - { - "cell_type": "code", - "source": [ - "ordem = ['regiao', 'localizacao', 'rede', 'atu_ei', 'atu_ei_creche', 'atu_ei_pre_escola', 'atu_ef', 'atu_ef_anos_iniciais',\n", - " 'atu_ef_anos_finais', 'atu_ef_1_ano', 'atu_ef_2_ano', 'atu_ef_3_ano', 'atu_ef_4_ano', 'atu_ef_5_ano',\n", - " 'atu_ef_6_ano', 'atu_ef_7_ano', 'atu_ef_8_ano', 'atu_ef_9_ano', 'atu_ef_turmas_unif_multi_fluxo',\n", - " 'atu_em', 'atu_em_1_ano', 'atu_em_2_ano', 'atu_em_3_ano', 'atu_em_4_ano', 'atu_em_nao_seriado', 'had_ei',\n", - " 'had_ei_creche', 'had_ei_pre_escola', 'had_ef', 'had_ef_anos_iniciais', 'had_ef_anos_finais', 'had_ef_1_ano',\n", - " 'had_ef_2_ano', 'had_ef_3_ano', 'had_ef_4_ano', 'had_ef_5_ano', 'had_ef_6_ano', 'had_ef_7_ano',\n", - " 'had_ef_8_ano', 'had_ef_9_ano', 'had_em', 'had_em_1_ano', 'had_em_2_ano', 'had_em_3_ano', 'had_em_4_ano',\n", - " 'had_em_nao_seriado', 'tdi_ef', 'tdi_ef_anos_iniciais', 'tdi_ef_anos_finais', 'tdi_ef_1_ano', 'tdi_ef_2_ano',\n", - " 'tdi_ef_3_ano', 'tdi_ef_4_ano', 'tdi_ef_5_ano', 'tdi_ef_6_ano', 'tdi_ef_7_ano', 'tdi_ef_8_ano', 'tdi_ef_9_ano',\n", - " 'tdi_em', 'tdi_em_1_ano', 'tdi_em_2_ano', 'tdi_em_3_ano', 'tdi_em_4_ano', 'taxa_aprovacao_ef', \n", - " 'taxa_aprovacao_ef_anos_iniciais', 'taxa_aprovacao_ef_anos_finais', 'taxa_aprovacao_ef_1_ano',\n", - " 'taxa_aprovacao_ef_2_ano', 'taxa_aprovacao_ef_3_ano', 'taxa_aprovacao_ef_4_ano', 'taxa_aprovacao_ef_5_ano',\n", - " 'taxa_aprovacao_ef_6_ano', 'taxa_aprovacao_ef_7_ano', 'taxa_aprovacao_ef_8_ano', 'taxa_aprovacao_ef_9_ano',\n", - " 'taxa_aprovacao_em', 'taxa_aprovacao_em_1_ano', 'taxa_aprovacao_em_2_ano', 'taxa_aprovacao_em_3_ano',\n", - " 'taxa_aprovacao_em_4_ano', 'taxa_aprovacao_em_nao_seriado', 'taxa_reprovacao_ef', 'taxa_reprovacao_ef_anos_iniciais',\n", - " 'taxa_reprovacao_ef_anos_finais', 'taxa_reprovacao_ef_1_ano', 'taxa_reprovacao_ef_2_ano', 'taxa_reprovacao_ef_3_ano',\n", - " 'taxa_reprovacao_ef_4_ano', 'taxa_reprovacao_ef_5_ano', 'taxa_reprovacao_ef_6_ano', 'taxa_reprovacao_ef_7_ano',\n", - " 'taxa_reprovacao_ef_8_ano' ,'taxa_reprovacao_ef_9_ano', 'taxa_reprovacao_em', 'taxa_reprovacao_em_1_ano',\n", - " 'taxa_reprovacao_em_2_ano' ,'taxa_reprovacao_em_3_ano', 'taxa_reprovacao_em_4_ano' ,'taxa_reprovacao_em_nao_seriado',\n", - " 'taxa_abandono_ef' ,'taxa_abandono_ef_anos_iniciais', 'taxa_abandono_ef_anos_finais', 'taxa_abandono_ef_1_ano', \n", - " 'taxa_abandono_ef_2_ano' ,'taxa_abandono_ef_3_ano', 'taxa_abandono_ef_4_ano', 'taxa_abandono_ef_5_ano', \n", - " 'taxa_abandono_ef_6_ano', 'taxa_abandono_ef_7_ano', 'taxa_abandono_ef_8_ano' ,'taxa_abandono_ef_9_ano', \n", - " 'taxa_abandono_em', 'taxa_abandono_em_1_ano' ,'taxa_abandono_em_2_ano', 'taxa_abandono_em_3_ano', \n", - " 'taxa_abandono_em_4_ano', 'taxa_abandono_em_nao_seriado', 'tnr_ef', 'tnr_ef_anos_iniciais', 'tnr_ef_anos_finais', \n", - " 'tnr_ef_1_ano', 'tnr_ef_2_ano', 'tnr_ef_3_ano', 'tnr_ef_4_ano', 'tnr_ef_5_ano', 'tnr_ef_6_ano', 'tnr_ef_7_ano',\n", - " 'tnr_ef_8_ano', 'tnr_ef_9_ano', 'tnr_em' ,'tnr_em_1_ano', 'tnr_em_2_ano', 'tnr_em_3_ano', 'tnr_em_4_ano', \n", - " 'tnr_em_nao_seriado', 'dsu_ei', 'dsu_ei_creche', 'dsu_ei_pre_escola', 'dsu_ef', 'dsu_ef_anos_iniciais', \n", - " 'dsu_ef_anos_finais' ,'dsu_em', 'dsu_ep', 'dsu_eja', 'dsu_ee', 'afd_ei_grupo_1', 'afd_ei_grupo_2', \n", - " 'afd_ei_grupo_3', 'afd_ei_grupo_4', 'afd_ei_grupo_5', 'afd_ef_grupo_1' ,'afd_ef_grupo_2', 'afd_ef_grupo_3',\n", - " 'afd_ef_grupo_4', 'afd_ef_grupo_5', 'afd_ef_anos_iniciais_grupo_1', 'afd_ef_anos_iniciais_grupo_2', \n", - " 'afd_ef_anos_iniciais_grupo_3', 'afd_ef_anos_iniciais_grupo_4' ,'afd_ef_anos_iniciais_grupo_5',\n", - " 'afd_ef_anos_finais_grupo_1', 'afd_ef_anos_finais_grupo_2', 'afd_ef_anos_finais_grupo_3', \n", - " 'afd_ef_anos_finais_grupo_4', 'afd_ef_anos_finais_grupo_5', 'afd_em_grupo_1', 'afd_em_grupo_2', 'afd_em_grupo_3', \n", - " 'afd_em_grupo_4', 'afd_em_grupo_5', 'afd_eja_fundamental_grupo_1', 'afd_eja_fundamental_grupo_2', \n", - " 'afd_eja_fundamental_grupo_3' ,'afd_eja_fundamental_grupo_4', 'afd_eja_fundamental_grupo_5',\n", - " 'afd_eja_medio_grupo_1', 'afd_eja_medio_grupo_2', 'afd_eja_medio_grupo_3' ,'afd_eja_medio_grupo_4', \n", - " 'afd_eja_medio_grupo_5' ,'ird_baixa_regularidade' ,'ird_media_baixa', 'ird_media_alta', 'ird_alta', \n", - " 'ied_ef_nivel_1' ,'ied_ef_nivel_2', 'ied_ef_nivel_3', 'ied_ef_nivel_4', 'ied_ef_nivel_5', 'ied_ef_nivel_6',\n", - " 'ied_ef_anos_iniciais_nivel_1', 'ied_ef_anos_iniciais_nivel_2', 'ied_ef_anos_iniciais_nivel_3', \n", - " 'ied_ef_anos_iniciais_nivel_4', 'ied_ef_anos_iniciais_nivel_5' ,'ied_ef_anos_iniciais_nivel_6',\n", - " 'ied_ef_anos_finais_nivel_1', 'ied_ef_anos_finais_nivel_2', 'ied_ef_anos_finais_nivel_3', \n", - " 'ied_ef_anos_finais_nivel_4', 'ied_ef_anos_finais_nivel_5' ,'ied_ef_anos_finais_nivel_6',\n", - " 'ied_em_nivel_1' ,'ied_em_nivel_2', 'ied_em_nivel_3', 'ied_em_nivel_4', 'ied_em_nivel_5', 'ied_em_nivel_6', \n", - " 'icg_nivel_1', 'icg_nivel_2', 'icg_nivel_3', 'icg_nivel_4', 'icg_nivel_5', 'icg_nivel_6']" - ], - "metadata": { - "id": "UIzF2PxrxSX3" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "regiao = ['Norte', 'Nordeste', 'Sudeste', 'Sul', 'Centro-Oeste']\n", - "rename_afd = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', 'ED_INF_CAT_1':'afd_ei_grupo_1', 'ED_INF_CAT_2':'afd_ei_grupo_2',\n", - " 'ED_INF_CAT_3':'afd_ei_grupo_3', 'ED_INF_CAT_4':'afd_ei_grupo_4', 'ED_INF_CAT_5':'afd_ei_grupo_5', 'FUN_CAT_1':'afd_ef_grupo_1', \n", - " 'FUN_CAT_2':'afd_ef_grupo_2', 'FUN_CAT_3':'afd_ef_grupo_3', 'FUN_CAT_4':'afd_ef_grupo_4', 'FUN_CAT_5':'afd_ef_grupo_5', 'FUN_AI_CAT_1':'afd_ef_anos_iniciais_grupo_1',\n", - " 'FUN_AI_CAT_2':'afd_ef_anos_iniciais_grupo_2', 'FUN_AI_CAT_3':'afd_ef_anos_iniciais_grupo_3', 'FUN_AI_CAT_4':'afd_ef_anos_iniciais_grupo_4',\n", - " 'FUN_AI_CAT_5':'afd_ef_anos_iniciais_grupo_5', 'FUN_AF_CAT_1':'afd_ef_anos_finais_grupo_1', 'FUN_AF_CAT_2':'afd_ef_anos_finais_grupo_2',\n", - " 'FUN_AF_CAT_3':'afd_ef_anos_finais_grupo_3', 'FUN_AF_CAT_4':'afd_ef_anos_finais_grupo_4', 'FUN_AF_CAT_5':'afd_ef_anos_finais_grupo_5',\n", - " 'MED_CAT_1':'afd_em_grupo_1', 'MED_CAT_2':'afd_em_grupo_2', 'MED_CAT_3':'afd_em_grupo_3', 'MED_CAT_4':'afd_em_grupo_4', 'MED_CAT_5':'afd_em_grupo_5',\n", - " 'EJA_FUN_CAT_1':'afd_eja_fundamental_grupo_1', 'EJA_FUN_CAT_2':'afd_eja_fundamental_grupo_2', 'EJA_FUN_CAT_3':'afd_eja_fundamental_grupo_3',\n", - " 'EJA_FUN_CAT_4':'afd_eja_fundamental_grupo_4', 'EJA_FUN_CAT_5':'afd_eja_fundamental_grupo_5', 'EJA_MED_CAT_1':'afd_eja_medio_grupo_1',\n", - " 'EJA_MED_CAT_2':'afd_eja_medio_grupo_2', 'EJA_MED_CAT_3':'afd_eja_medio_grupo_3', 'EJA_MED_CAT_4':'afd_eja_medio_grupo_4', \n", - " 'EJA_MED_CAT_5':'afd_eja_medio_grupo_5'}\n", - "\n", - "with ZipFile('/content/AFD_2022_BRASIL_REGIOES_UF.zip') as z:\n", - " with z.open('AFD_2022_BRASIL_REGIOES_UF/AFD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " afd = pd.read_excel(f, skiprows=10, dtype=str)\n", - " afd = afd.replace('--', '')\n", - " afd = afd[afd.UNIDGEO.isin(regiao)]\n", - " afd.rename(columns=rename_afd, inplace=True)\n", - " afd['localizacao'] = afd['localizacao'].str.lower().replace('pública', 'publica')\n", - " afd['rede'] = afd['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_atu = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'atu_ei', 'CRE_CAT_0':'atu_ei_creche', 'PRE_CAT_0':'atu_ei_pre_escola', 'FUN_CAT_0':'atu_ef',\n", - " 'FUN_AI_CAT_0':'atu_ef_anos_iniciais', 'FUN_AF_CAT_0':'atu_ef_anos_finais', 'FUN_01_CAT_0':'atu_ef_1_ano',\n", - " 'FUN_02_CAT_0':'atu_ef_2_ano', 'FUN_03_CAT_0':'atu_ef_3_ano', 'FUN_04_CAT_0':'atu_ef_4_ano',\n", - " 'FUN_05_CAT_0':'atu_ef_5_ano', 'FUN_06_CAT_0':'atu_ef_6_ano', 'FUN_07_CAT_0':'atu_ef_7_ano', \n", - " 'FUN_08_CAT_0':'atu_ef_8_ano', 'FUN_09_CAT_0':'atu_ef_9_ano', 'MULT_ETA_CAT_0':'atu_ef_turmas_unif_multi_fluxo',\n", - " 'MED_CAT_0':'atu_em', 'MED_01_CAT_0':'atu_em_1_ano', 'MED_02_CAT_0':'atu_em_2_ano', 'MED_03_CAT_0':'atu_em_3_ano',\n", - " 'MED_04_CAT_0':'atu_em_4_ano', 'MED_NS_CAT_0':'atu_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/ATU_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('ATU_2022_BRASIL_REGIOES_UFS/ATU_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " atu = pd.read_excel(f, skiprows=8, dtype=str)\n", - " atu = atu[atu.UNIDGEO.isin(regiao)]\n", - " atu = atu.replace('--', '')\n", - " atu.rename(columns=rename_atu, inplace=True)\n", - " atu['localizacao'] = atu['localizacao'].str.lower().replace('pública', 'publica')\n", - " atu['rede'] = atu['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_dsu = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'dsu_ei', 'CRE_CAT_0':'dsu_ei_creche', 'PRE_CAT_0':'dsu_ei_pre_escola', 'FUN_CAT_0':'dsu_ef',\n", - " 'FUN_AI_CAT_0':'dsu_ef_anos_iniciais', 'FUN_AF_CAT_0':'dsu_ef_anos_finais', 'MED_CAT_0':'dsu_em',\n", - " 'PROF_CAT_0':'dsu_ep', 'EJA_CAT_0':'dsu_eja', 'EDU_BAS_CAT_0':'dsu_ee'}\n", - "\n", - "with ZipFile('/content/DSU_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('DSU_2022_BRASIL_REGIOES_UFS/DSU_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " dsu = pd.read_excel(f, skiprows=9, dtype=str)\n", - " dsu = dsu[dsu.UNIDGEO.isin(regiao)]\n", - " dsu = dsu.replace('--', '')\n", - " dsu.rename(columns=rename_dsu, inplace=True)\n", - " dsu['localizacao'] = dsu['localizacao'].str.lower().replace('pública', 'publica')\n", - " dsu['rede'] = dsu['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_had = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'had_ei', 'CRE_CAT_0':'had_ei_creche', 'PRE_CAT_0':'had_ei_pre_escola',\n", - " 'FUN_CAT_0':'had_ef', 'FUN_AI_CAT_0':'had_ef_anos_iniciais', 'FUN_AF_CAT_0':'had_ef_anos_finais', 'FUN_01_CAT_0':'had_ef_1_ano',\n", - " 'FUN_02_CAT_0':'had_ef_2_ano', 'FUN_03_CAT_0':'had_ef_3_ano', 'FUN_04_CAT_0':'had_ef_4_ano', 'FUN_05_CAT_0':'had_ef_5_ano',\n", - " 'FUN_06_CAT_0':'had_ef_6_ano', 'FUN_07_CAT_0':'had_ef_7_ano', 'FUN_08_CAT_0':'had_ef_8_ano', 'FUN_09_CAT_0':'had_ef_9_ano',\n", - " 'MED_CAT_0':'had_em', 'MED_01_CAT_0':'had_em_1_ano', 'MED_02_CAT_0':'had_em_2_ano', 'MED_03_CAT_0':'had_em_3_ano',\n", - " 'MED_04_CAT_0':'had_em_4_ano', 'MED_NS_CAT_0':'had_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/HAD_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('HAD_2022_BRASIL_REGIOES_UFS/HAD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " had = pd.read_excel(f, skiprows=8, dtype=str)\n", - " had = had[had.UNIDGEO.isin(regiao)]\n", - " had = had.replace('--', '')\n", - " had.rename(columns=rename_had, inplace=True)\n", - " had['localizacao'] = had['localizacao'].str.lower().replace('pública', 'publica')\n", - " had['rede'] = had['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_icg = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'EDU_BAS_CAT_1':'icg_nivel_1', 'EDU_BAS_CAT_2':'icg_nivel_2', 'EDU_BAS_CAT_3':'icg_nivel_3',\n", - " 'EDU_BAS_CAT_4':'icg_nivel_4', 'EDU_BAS_CAT_5':'icg_nivel_5', 'EDU_BAS_CAT_6':'icg_nivel_6'}\n", - "\n", - "with ZipFile('/content/ICG_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('ICG_2022_BRASIL_REGIOES_UFS/ICG_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " icg = pd.read_excel(f, skiprows=8, dtype=str)\n", - " icg = icg[icg.UNIDGEO.isin(regiao)]\n", - " icg = icg.replace('--', '')\n", - " icg.rename(columns=rename_icg, inplace=True)\n", - " icg['localizacao'] = icg['localizacao'].str.lower().replace('pública', 'publica')\n", - " icg['rede'] = icg['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_ied = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'FUN_CAT_1':'ied_ef_nivel_1', 'FUN_CAT_2':'ied_ef_nivel_2', 'FUN_CAT_3':'ied_ef_nivel_3',\n", - " 'FUN_CAT_4':'ied_ef_nivel_4', 'FUN_CAT_5':'ied_ef_nivel_5', 'FUN_CAT_6':'ied_ef_nivel_6',\n", - " 'FUN_AI_CAT_1':'ied_ef_anos_iniciais_nivel_1', 'FUN_AI_CAT_2':'ied_ef_anos_iniciais_nivel_2',\n", - " 'FUN_AI_CAT_3':'ied_ef_anos_iniciais_nivel_3', 'FUN_AI_CAT_4':'ied_ef_anos_iniciais_nivel_4',\n", - " 'FUN_AI_CAT_5':'ied_ef_anos_iniciais_nivel_5', 'FUN_AI_CAT_6':'ied_ef_anos_iniciais_nivel_6',\n", - " 'FUN_AF_CAT_1':'ied_ef_anos_finais_nivel_1', 'FUN_AF_CAT_2':'ied_ef_anos_finais_nivel_2',\n", - " 'FUN_AF_CAT_3':'ied_ef_anos_finais_nivel_3', 'FUN_AF_CAT_4':'ied_ef_anos_finais_nivel_4',\n", - " 'FUN_AF_CAT_5':'ied_ef_anos_finais_nivel_5', 'FUN_AF_CAT_6':'ied_ef_anos_finais_nivel_6',\n", - " 'MED_CAT_1':'ied_em_nivel_1', 'MED_CAT_2':'ied_em_nivel_2', 'MED_CAT_3':'ied_em_nivel_3',\n", - " 'MED_CAT_4':'ied_em_nivel_4', 'MED_CAT_5':'ied_em_nivel_5', 'MED_CAT_6':'ied_em_nivel_6'}\n", - "\n", - "with ZipFile('/content/IED_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('IED_2022_BRASIL_REGIOES_UFS/IED_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " ied = pd.read_excel(f, skiprows=10, dtype=str)\n", - " ied = ied[ied.UNIDGEO.isin(regiao)]\n", - " ied = ied.replace('--', '')\n", - " ied.rename(columns=rename_ied, inplace=True)\n", - " ied['localizacao'] = ied['localizacao'].str.lower().replace('pública', 'publica')\n", - " ied['rede'] = ied['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_ird = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'EDU_BAS_CAT_1':'ird_baixa_regularidade', 'EDU_BAS_CAT_2':'ird_media_baixa', \n", - " 'EDU_BAS_CAT_3':'ird_media_alta', 'EDU_BAS_CAT_4':'ird_alta'}\n", - "\n", - "with ZipFile('/content/IRD_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('IRD_2022_BRASIL_REGIOES_UFS/IRD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " ird = pd.read_excel(f, skiprows=9, dtype=str)\n", - " ird = ird[ird.UNIDGEO.isin(regiao)]\n", - " ird = ird.replace('--', '')\n", - " ird.rename(columns=rename_ird, inplace=True)\n", - " ird['localizacao'] = ird['localizacao'].str.lower().replace('pública', 'publica')\n", - " ird['rede'] = ird['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_tdi = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'FUN_CAT_0':'tdi_ef', 'FUN_AI_CAT_0':'tdi_ef_anos_iniciais', 'FUN_AF_CAT_0':'tdi_ef_anos_finais',\n", - " 'FUN_01_CAT_0':'tdi_ef_1_ano', 'FUN_02_CAT_0':'tdi_ef_2_ano', 'FUN_03_CAT_0':'tdi_ef_3_ano',\n", - " 'FUN_04_CAT_0':'tdi_ef_4_ano', 'FUN_05_CAT_0':'tdi_ef_5_ano', 'FUN_06_CAT_0':'tdi_ef_6_ano',\n", - " 'FUN_07_CAT_0':'tdi_ef_7_ano', 'FUN_08_CAT_0':'tdi_ef_8_ano', 'FUN_09_CAT_0':'tdi_ef_9_ano',\n", - " 'MED_CAT_0':'tdi_em', 'MED_01_CAT_0':'tdi_em_1_ano', 'MED_02_CAT_0':'tdi_em_2_ano',\n", - " 'MED_03_CAT_0':'tdi_em_3_ano', 'MED_04_CAT_0':'tdi_em_4_ano'}\n", - "\n", - "with ZipFile('/content/TDI_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('TDI_2022_BRASIL_REGIOES_UFS/TDI_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " tdi = pd.read_excel(f, skiprows=8, dtype=str)\n", - " tdi = tdi[tdi.UNIDGEO.isin(regiao)]\n", - " tdi = tdi.replace('--', '')\n", - " tdi.rename(columns=rename_tdi, inplace=True)\n", - " tdi['localizacao'] = tdi['localizacao'].str.lower().replace('pública', 'publica')\n", - " tdi['rede'] = tdi['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "# rename_tnr = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", - "# '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", - "# '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", - "# '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", - "# '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", - "# '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", - "\n", - "# with ZipFile('/content/tnr_municipios_2021.zip') as z:\n", - "# with z.open('tnr_municipios_2021/tnr_municipios_2021.xlsx') as f:\n", - "# tnr = pd.read_excel(f, skiprows=8, dtype=str)\n", - "# tnr = tnr[tnr.UNIDGEO.isin(regiao)]\n", - "# tnr = tnr.replace('--', '')\n", - "# tnr.rename(columns=rename_tnr, inplace=True)\n", - "# tnr[index] = tnr[index].astype(str)\n", - "# tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", - "# tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", - " \n", - "\n", - "# rename_tx = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", - "# '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", - "# '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", - "# '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", - "# '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", - "# '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", - "# '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", - "# '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", - "# '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", - "# '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", - "# '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", - "# '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", - "# '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", - "# '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", - "# '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", - "# '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", - "# '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", - "# '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", - "# '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", - "\n", - "# with ZipFile('/content/tx_rend_municipios_2021.zip') as z:\n", - "# with z.open('tx_rend_municipios_2021/tx_rend_municipios_2021.xlsx') as f:\n", - "# tx = pd.read_excel(f, skiprows=8, dtype=str)\n", - "# tx = tx[tx.UNIDGEO.isin(regiao)]\n", - "# tx = tx.replace('--', '')\n", - "# tx.rename(columns=rename_tx, inplace=True)\n", - "# tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", - "# tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", - " \n", - "index = ['ano',\t'regiao',\t'localizacao',\t'rede'] \n", - "df = pd.merge(afd, atu, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, dsu, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, had, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, icg, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, ied, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, ird, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, tdi, how='outer', left_on=index, right_on=index)\n", - "# df = pd.merge(df, tnr, how='outer', left_on=index, right_on=index)\n", - "# df = pd.merge(df, tx, how='outer', left_on=index, right_on=index)\n", - "\n", - "#COMO ALGUNS DADOS NÃO FORAM ATUALIZADOS, CRIA-SE DF2 COMO COMPLEMENTAR PARA A ORDENAÇÃO DA BASE\n", - "df_list = df.columns\n", - "df2 = list(set(ordem) - set(df_list))\n", - "df = pd.concat([df, pd.DataFrame(columns=df2)])\n", - "#ORDENA TODAS AS VARIÁVEIS \n", - "df = df[ordem]\n", - "\n", - "df.to_csv('/content/regiao.csv', index=False, encoding='utf-8', na_rep='')" - ], - "metadata": { - "id": "RvcgO46WxnQs" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "### UF" - ], - "metadata": { - "id": "hqhcoua0ru0n" - } - }, - { - "cell_type": "code", - "source": [ - "query = 'SELECT sigla, nome FROM basedosdados.br_bd_diretorios_brasil.uf'\n", - "sigla = bd.read_sql(query, billing_project_id='input-bd')\n", - "sigla.rename(columns={'sigla':'sigla_uf'}, inplace=True)\n", - "nome_ufs = sigla['nome'].tolist()\n", - "ufs = {'Acre':'AC', 'Alagoas':'AL', 'Amazonas':'AM', 'Amapá':'AP', 'Bahia':'BA', 'Ceará':'CE', 'Distrito Federal':'DF', 'Espírito Santo':'ES', 'Goiás':'GO', \n", - " 'Maranhão':'MA', 'Minas Gerais':'MG', 'Mato Grosso do Sul':'MS', 'Mato Gross':'MT', \n", - " 'Pará':'PA', 'Paraíba':'PB', 'Pernambuco':'PE', 'Piauí':'PI', 'Paraná':'PR', 'Rio de Janeiro':'RJ', 'Rio Grande do Norte':'RN', \n", - " 'Rondônia':'RO', 'Roraima':'RR', 'Rio Grande do Sul':'RS', 'Santa Catarina':'SC', 'Sergipe':'SE', 'São Paulo':'SP', 'Tocantins':'TO'}" - ], - "metadata": { - "id": "ovuiiT7AtIjJ", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "f5d2bde6-54d5-4d2f-9f3c-6891e74f6bf4" - }, - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "Downloading: 100%|██████████| 27/27 [00:00<00:00, 150.63rows/s]\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "ordem = ['sigla_uf', 'localizacao', 'rede', 'atu_ei', 'atu_ei_creche', 'atu_ei_pre_escola', 'atu_ef', 'atu_ef_anos_iniciais',\n", - " 'atu_ef_anos_finais', 'atu_ef_1_ano', 'atu_ef_2_ano', 'atu_ef_3_ano', 'atu_ef_4_ano', 'atu_ef_5_ano',\n", - " 'atu_ef_6_ano', 'atu_ef_7_ano', 'atu_ef_8_ano', 'atu_ef_9_ano', 'atu_ef_turmas_unif_multi_fluxo',\n", - " 'atu_em', 'atu_em_1_ano', 'atu_em_2_ano', 'atu_em_3_ano', 'atu_em_4_ano', 'atu_em_nao_seriado', 'had_ei',\n", - " 'had_ei_creche', 'had_ei_pre_escola', 'had_ef', 'had_ef_anos_iniciais', 'had_ef_anos_finais', 'had_ef_1_ano',\n", - " 'had_ef_2_ano', 'had_ef_3_ano', 'had_ef_4_ano', 'had_ef_5_ano', 'had_ef_6_ano', 'had_ef_7_ano',\n", - " 'had_ef_8_ano', 'had_ef_9_ano', 'had_em', 'had_em_1_ano', 'had_em_2_ano', 'had_em_3_ano', 'had_em_4_ano',\n", - " 'had_em_nao_seriado', 'tdi_ef', 'tdi_ef_anos_iniciais', 'tdi_ef_anos_finais', 'tdi_ef_1_ano', 'tdi_ef_2_ano',\n", - " 'tdi_ef_3_ano', 'tdi_ef_4_ano', 'tdi_ef_5_ano', 'tdi_ef_6_ano', 'tdi_ef_7_ano', 'tdi_ef_8_ano', 'tdi_ef_9_ano',\n", - " 'tdi_em', 'tdi_em_1_ano', 'tdi_em_2_ano', 'tdi_em_3_ano', 'tdi_em_4_ano', 'taxa_aprovacao_ef', \n", - " 'taxa_aprovacao_ef_anos_iniciais', 'taxa_aprovacao_ef_anos_finais', 'taxa_aprovacao_ef_1_ano',\n", - " 'taxa_aprovacao_ef_2_ano', 'taxa_aprovacao_ef_3_ano', 'taxa_aprovacao_ef_4_ano', 'taxa_aprovacao_ef_5_ano',\n", - " 'taxa_aprovacao_ef_6_ano', 'taxa_aprovacao_ef_7_ano', 'taxa_aprovacao_ef_8_ano', 'taxa_aprovacao_ef_9_ano',\n", - " 'taxa_aprovacao_em', 'taxa_aprovacao_em_1_ano', 'taxa_aprovacao_em_2_ano', 'taxa_aprovacao_em_3_ano',\n", - " 'taxa_aprovacao_em_4_ano', 'taxa_aprovacao_em_nao_seriado', 'taxa_reprovacao_ef', 'taxa_reprovacao_ef_anos_iniciais',\n", - " 'taxa_reprovacao_ef_anos_finais', 'taxa_reprovacao_ef_1_ano', 'taxa_reprovacao_ef_2_ano', 'taxa_reprovacao_ef_3_ano',\n", - " 'taxa_reprovacao_ef_4_ano', 'taxa_reprovacao_ef_5_ano', 'taxa_reprovacao_ef_6_ano', 'taxa_reprovacao_ef_7_ano',\n", - " 'taxa_reprovacao_ef_8_ano' ,'taxa_reprovacao_ef_9_ano', 'taxa_reprovacao_em', 'taxa_reprovacao_em_1_ano',\n", - " 'taxa_reprovacao_em_2_ano' ,'taxa_reprovacao_em_3_ano', 'taxa_reprovacao_em_4_ano' ,'taxa_reprovacao_em_nao_seriado',\n", - " 'taxa_abandono_ef' ,'taxa_abandono_ef_anos_iniciais', 'taxa_abandono_ef_anos_finais', 'taxa_abandono_ef_1_ano', \n", - " 'taxa_abandono_ef_2_ano' ,'taxa_abandono_ef_3_ano', 'taxa_abandono_ef_4_ano', 'taxa_abandono_ef_5_ano', \n", - " 'taxa_abandono_ef_6_ano', 'taxa_abandono_ef_7_ano', 'taxa_abandono_ef_8_ano' ,'taxa_abandono_ef_9_ano', \n", - " 'taxa_abandono_em', 'taxa_abandono_em_1_ano' ,'taxa_abandono_em_2_ano', 'taxa_abandono_em_3_ano', \n", - " 'taxa_abandono_em_4_ano', 'taxa_abandono_em_nao_seriado', 'tnr_ef', 'tnr_ef_anos_iniciais', 'tnr_ef_anos_finais', \n", - " 'tnr_ef_1_ano', 'tnr_ef_2_ano', 'tnr_ef_3_ano', 'tnr_ef_4_ano', 'tnr_ef_5_ano', 'tnr_ef_6_ano', 'tnr_ef_7_ano',\n", - " 'tnr_ef_8_ano', 'tnr_ef_9_ano', 'tnr_em' ,'tnr_em_1_ano', 'tnr_em_2_ano', 'tnr_em_3_ano', 'tnr_em_4_ano', \n", - " 'tnr_em_nao_seriado', 'dsu_ei', 'dsu_ei_creche', 'dsu_ei_pre_escola', 'dsu_ef', 'dsu_ef_anos_iniciais', \n", - " 'dsu_ef_anos_finais' ,'dsu_em', 'dsu_ep', 'dsu_eja', 'dsu_ee', 'afd_ei_grupo_1', 'afd_ei_grupo_2', \n", - " 'afd_ei_grupo_3', 'afd_ei_grupo_4', 'afd_ei_grupo_5', 'afd_ef_grupo_1' ,'afd_ef_grupo_2', 'afd_ef_grupo_3',\n", - " 'afd_ef_grupo_4', 'afd_ef_grupo_5', 'afd_ef_anos_iniciais_grupo_1', 'afd_ef_anos_iniciais_grupo_2', \n", - " 'afd_ef_anos_iniciais_grupo_3', 'afd_ef_anos_iniciais_grupo_4' ,'afd_ef_anos_iniciais_grupo_5',\n", - " 'afd_ef_anos_finais_grupo_1', 'afd_ef_anos_finais_grupo_2', 'afd_ef_anos_finais_grupo_3', \n", - " 'afd_ef_anos_finais_grupo_4', 'afd_ef_anos_finais_grupo_5', 'afd_em_grupo_1', 'afd_em_grupo_2', 'afd_em_grupo_3', \n", - " 'afd_em_grupo_4', 'afd_em_grupo_5', 'afd_eja_fundamental_grupo_1', 'afd_eja_fundamental_grupo_2', \n", - " 'afd_eja_fundamental_grupo_3' ,'afd_eja_fundamental_grupo_4', 'afd_eja_fundamental_grupo_5',\n", - " 'afd_eja_medio_grupo_1', 'afd_eja_medio_grupo_2', 'afd_eja_medio_grupo_3' ,'afd_eja_medio_grupo_4', \n", - " 'afd_eja_medio_grupo_5' ,'ird_baixa_regularidade' ,'ird_media_baixa', 'ird_media_alta', 'ird_alta', \n", - " 'ied_ef_nivel_1' ,'ied_ef_nivel_2', 'ied_ef_nivel_3', 'ied_ef_nivel_4', 'ied_ef_nivel_5', 'ied_ef_nivel_6',\n", - " 'ied_ef_anos_iniciais_nivel_1', 'ied_ef_anos_iniciais_nivel_2', 'ied_ef_anos_iniciais_nivel_3', \n", - " 'ied_ef_anos_iniciais_nivel_4', 'ied_ef_anos_iniciais_nivel_5' ,'ied_ef_anos_iniciais_nivel_6',\n", - " 'ied_ef_anos_finais_nivel_1', 'ied_ef_anos_finais_nivel_2', 'ied_ef_anos_finais_nivel_3', \n", - " 'ied_ef_anos_finais_nivel_4', 'ied_ef_anos_finais_nivel_5' ,'ied_ef_anos_finais_nivel_6',\n", - " 'ied_em_nivel_1' ,'ied_em_nivel_2', 'ied_em_nivel_3', 'ied_em_nivel_4', 'ied_em_nivel_5', 'ied_em_nivel_6', \n", - " 'icg_nivel_1', 'icg_nivel_2', 'icg_nivel_3', 'icg_nivel_4', 'icg_nivel_5', 'icg_nivel_6']" - ], - "metadata": { - "id": "RgpH_CmKzp2T" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "rename_afd = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', 'ED_INF_CAT_1':'afd_ei_grupo_1', 'ED_INF_CAT_2':'afd_ei_grupo_2',\n", - " 'ED_INF_CAT_3':'afd_ei_grupo_3', 'ED_INF_CAT_4':'afd_ei_grupo_4', 'ED_INF_CAT_5':'afd_ei_grupo_5', 'FUN_CAT_1':'afd_ef_grupo_1', \n", - " 'FUN_CAT_2':'afd_ef_grupo_2', 'FUN_CAT_3':'afd_ef_grupo_3', 'FUN_CAT_4':'afd_ef_grupo_4', 'FUN_CAT_5':'afd_ef_grupo_5', 'FUN_AI_CAT_1':'afd_ef_anos_iniciais_grupo_1',\n", - " 'FUN_AI_CAT_2':'afd_ef_anos_iniciais_grupo_2', 'FUN_AI_CAT_3':'afd_ef_anos_iniciais_grupo_3', 'FUN_AI_CAT_4':'afd_ef_anos_iniciais_grupo_4',\n", - " 'FUN_AI_CAT_5':'afd_ef_anos_iniciais_grupo_5', 'FUN_AF_CAT_1':'afd_ef_anos_finais_grupo_1', 'FUN_AF_CAT_2':'afd_ef_anos_finais_grupo_2',\n", - " 'FUN_AF_CAT_3':'afd_ef_anos_finais_grupo_3', 'FUN_AF_CAT_4':'afd_ef_anos_finais_grupo_4', 'FUN_AF_CAT_5':'afd_ef_anos_finais_grupo_5',\n", - " 'MED_CAT_1':'afd_em_grupo_1', 'MED_CAT_2':'afd_em_grupo_2', 'MED_CAT_3':'afd_em_grupo_3', 'MED_CAT_4':'afd_em_grupo_4', 'MED_CAT_5':'afd_em_grupo_5',\n", - " 'EJA_FUN_CAT_1':'afd_eja_fundamental_grupo_1', 'EJA_FUN_CAT_2':'afd_eja_fundamental_grupo_2', 'EJA_FUN_CAT_3':'afd_eja_fundamental_grupo_3',\n", - " 'EJA_FUN_CAT_4':'afd_eja_fundamental_grupo_4', 'EJA_FUN_CAT_5':'afd_eja_fundamental_grupo_5', 'EJA_MED_CAT_1':'afd_eja_medio_grupo_1',\n", - " 'EJA_MED_CAT_2':'afd_eja_medio_grupo_2', 'EJA_MED_CAT_3':'afd_eja_medio_grupo_3', 'EJA_MED_CAT_4':'afd_eja_medio_grupo_4', \n", - " 'EJA_MED_CAT_5':'afd_eja_medio_grupo_5'}\n", - "\n", - "with ZipFile('/content/AFD_2022_BRASIL_REGIOES_UF.zip') as z:\n", - " with z.open('AFD_2022_BRASIL_REGIOES_UF/AFD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " afd = pd.read_excel(f, skiprows=10, dtype=str)\n", - " afd = afd.replace('--', '')\n", - " afd = afd[afd.UNIDGEO.isin(nome_ufs)] \n", - " afd['sigla_uf'] = afd['UNIDGEO'].map(ufs)\n", - " afd.rename(columns=rename_afd, inplace=True)\n", - " afd['localizacao'] = afd['localizacao'].str.lower().replace('pública', 'publica')\n", - " afd['rede'] = afd['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_atu = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'atu_ei', 'CRE_CAT_0':'atu_ei_creche', 'PRE_CAT_0':'atu_ei_pre_escola', 'FUN_CAT_0':'atu_ef',\n", - " 'FUN_AI_CAT_0':'atu_ef_anos_iniciais', 'FUN_AF_CAT_0':'atu_ef_anos_finais', 'FUN_01_CAT_0':'atu_ef_1_ano',\n", - " 'FUN_02_CAT_0':'atu_ef_2_ano', 'FUN_03_CAT_0':'atu_ef_3_ano', 'FUN_04_CAT_0':'atu_ef_4_ano',\n", - " 'FUN_05_CAT_0':'atu_ef_5_ano', 'FUN_06_CAT_0':'atu_ef_6_ano', 'FUN_07_CAT_0':'atu_ef_7_ano', \n", - " 'FUN_08_CAT_0':'atu_ef_8_ano', 'FUN_09_CAT_0':'atu_ef_9_ano', 'MULT_ETA_CAT_0':'atu_ef_turmas_unif_multi_fluxo',\n", - " 'MED_CAT_0':'atu_em', 'MED_01_CAT_0':'atu_em_1_ano', 'MED_02_CAT_0':'atu_em_2_ano', 'MED_03_CAT_0':'atu_em_3_ano',\n", - " 'MED_04_CAT_0':'atu_em_4_ano', 'MED_NS_CAT_0':'atu_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/ATU_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('ATU_2022_BRASIL_REGIOES_UFS/ATU_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " atu = pd.read_excel(f, skiprows=8, dtype=str)\n", - " atu = atu[atu.UNIDGEO.isin(nome_ufs)]\n", - " atu['sigla_uf'] = atu['UNIDGEO'].map(ufs)\n", - " atu = atu.replace('--', '')\n", - " atu.rename(columns=rename_atu, inplace=True)\n", - " atu['localizacao'] = atu['localizacao'].str.lower().replace('pública', 'publica')\n", - " atu['rede'] = atu['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_dsu = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'dsu_ei', 'CRE_CAT_0':'dsu_ei_creche', 'PRE_CAT_0':'dsu_ei_pre_escola', 'FUN_CAT_0':'dsu_ef',\n", - " 'FUN_AI_CAT_0':'dsu_ef_anos_iniciais', 'FUN_AF_CAT_0':'dsu_ef_anos_finais', 'MED_CAT_0':'dsu_em',\n", - " 'PROF_CAT_0':'dsu_ep', 'EJA_CAT_0':'dsu_eja', 'EDU_BAS_CAT_0':'dsu_ee'}\n", - "\n", - "with ZipFile('/content/DSU_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('DSU_2022_BRASIL_REGIOES_UFS/DSU_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " dsu = pd.read_excel(f, skiprows=9, dtype=str)\n", - " dsu = dsu[dsu.UNIDGEO.isin(nome_ufs)]\n", - " dsu['sigla_uf'] = dsu['UNIDGEO'].map(ufs)\n", - " dsu = dsu.replace('--', '')\n", - " dsu.rename(columns=rename_dsu, inplace=True)\n", - " dsu['localizacao'] = dsu['localizacao'].str.lower().replace('pública', 'publica')\n", - " dsu['rede'] = dsu['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_had = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'had_ei', 'CRE_CAT_0':'had_ei_creche', 'PRE_CAT_0':'had_ei_pre_escola',\n", - " 'FUN_CAT_0':'had_ef', 'FUN_AI_CAT_0':'had_ef_anos_iniciais', 'FUN_AF_CAT_0':'had_ef_anos_finais', 'FUN_01_CAT_0':'had_ef_1_ano',\n", - " 'FUN_02_CAT_0':'had_ef_2_ano', 'FUN_03_CAT_0':'had_ef_3_ano', 'FUN_04_CAT_0':'had_ef_4_ano', 'FUN_05_CAT_0':'had_ef_5_ano',\n", - " 'FUN_06_CAT_0':'had_ef_6_ano', 'FUN_07_CAT_0':'had_ef_7_ano', 'FUN_08_CAT_0':'had_ef_8_ano', 'FUN_09_CAT_0':'had_ef_9_ano',\n", - " 'MED_CAT_0':'had_em', 'MED_01_CAT_0':'had_em_1_ano', 'MED_02_CAT_0':'had_em_2_ano', 'MED_03_CAT_0':'had_em_3_ano',\n", - " 'MED_04_CAT_0':'had_em_4_ano', 'MED_NS_CAT_0':'had_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/HAD_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('HAD_2022_BRASIL_REGIOES_UFS/HAD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " had = pd.read_excel(f, skiprows=8, dtype=str)\n", - " had = had[had.UNIDGEO.isin(nome_ufs)]\n", - " had['sigla_uf'] = had['UNIDGEO'].map(ufs)\n", - " had = had.replace('--', '')\n", - " had.rename(columns=rename_had, inplace=True)\n", - " had['localizacao'] = had['localizacao'].str.lower().replace('pública', 'publica')\n", - " had['rede'] = had['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_icg = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'EDU_BAS_CAT_1':'icg_nivel_1', 'EDU_BAS_CAT_2':'icg_nivel_2', 'EDU_BAS_CAT_3':'icg_nivel_3',\n", - " 'EDU_BAS_CAT_4':'icg_nivel_4', 'EDU_BAS_CAT_5':'icg_nivel_5', 'EDU_BAS_CAT_6':'icg_nivel_6'}\n", - "\n", - "with ZipFile('/content/ICG_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('ICG_2022_BRASIL_REGIOES_UFS/ICG_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " icg = pd.read_excel(f, skiprows=8, dtype=str)\n", - " icg = icg[icg.UNIDGEO.isin(nome_ufs)]\n", - " icg['sigla_uf'] = icg['UNIDGEO'].map(ufs)\n", - " icg = icg.replace('--', '')\n", - " icg.rename(columns=rename_icg, inplace=True)\n", - " icg['localizacao'] = icg['localizacao'].str.lower().replace('pública', 'publica')\n", - " icg['rede'] = icg['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_ied = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'FUN_CAT_1':'ied_ef_nivel_1', 'FUN_CAT_2':'ied_ef_nivel_2', 'FUN_CAT_3':'ied_ef_nivel_3',\n", - " 'FUN_CAT_4':'ied_ef_nivel_4', 'FUN_CAT_5':'ied_ef_nivel_5', 'FUN_CAT_6':'ied_ef_nivel_6',\n", - " 'FUN_AI_CAT_1':'ied_ef_anos_iniciais_nivel_1', 'FUN_AI_CAT_2':'ied_ef_anos_iniciais_nivel_2',\n", - " 'FUN_AI_CAT_3':'ied_ef_anos_iniciais_nivel_3', 'FUN_AI_CAT_4':'ied_ef_anos_iniciais_nivel_4',\n", - " 'FUN_AI_CAT_5':'ied_ef_anos_iniciais_nivel_5', 'FUN_AI_CAT_6':'ied_ef_anos_iniciais_nivel_6',\n", - " 'FUN_AF_CAT_1':'ied_ef_anos_finais_nivel_1', 'FUN_AF_CAT_2':'ied_ef_anos_finais_nivel_2',\n", - " 'FUN_AF_CAT_3':'ied_ef_anos_finais_nivel_3', 'FUN_AF_CAT_4':'ied_ef_anos_finais_nivel_4',\n", - " 'FUN_AF_CAT_5':'ied_ef_anos_finais_nivel_5', 'FUN_AF_CAT_6':'ied_ef_anos_finais_nivel_6',\n", - " 'MED_CAT_1':'ied_em_nivel_1', 'MED_CAT_2':'ied_em_nivel_2', 'MED_CAT_3':'ied_em_nivel_3',\n", - " 'MED_CAT_4':'ied_em_nivel_4', 'MED_CAT_5':'ied_em_nivel_5', 'MED_CAT_6':'ied_em_nivel_6'}\n", - "\n", - "with ZipFile('/content/IED_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('IED_2022_BRASIL_REGIOES_UFS/IED_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " ied = pd.read_excel(f, skiprows=10, dtype=str)\n", - " ied = ied[ied.UNIDGEO.isin(nome_ufs)]\n", - " ied['sigla_uf'] = ied['UNIDGEO'].map(ufs)\n", - " ied = ied.replace('--', '')\n", - " ied.rename(columns=rename_ied, inplace=True)\n", - " ied['localizacao'] = ied['localizacao'].str.lower().replace('pública', 'publica')\n", - " ied['rede'] = ied['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_ird = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'EDU_BAS_CAT_1':'ird_baixa_regularidade', 'EDU_BAS_CAT_2':'ird_media_baixa', \n", - " 'EDU_BAS_CAT_3':'ird_media_alta', 'EDU_BAS_CAT_4':'ird_alta'}\n", - "\n", - "with ZipFile('/content/IRD_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('IRD_2022_BRASIL_REGIOES_UFS/IRD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " ird = pd.read_excel(f, skiprows=9, dtype=str)\n", - " ird = ird[ird.UNIDGEO.isin(nome_ufs)]\n", - " ird['sigla_uf'] = ird['UNIDGEO'].map(ufs)\n", - " ird = ird.replace('--', '')\n", - " ird.rename(columns=rename_ird, inplace=True)\n", - " ird['localizacao'] = ird['localizacao'].str.lower().replace('pública', 'publica')\n", - " ird['rede'] = ird['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_tdi = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'FUN_CAT_0':'tdi_ef', 'FUN_AI_CAT_0':'tdi_ef_anos_iniciais', 'FUN_AF_CAT_0':'tdi_ef_anos_finais',\n", - " 'FUN_01_CAT_0':'tdi_ef_1_ano', 'FUN_02_CAT_0':'tdi_ef_2_ano', 'FUN_03_CAT_0':'tdi_ef_3_ano',\n", - " 'FUN_04_CAT_0':'tdi_ef_4_ano', 'FUN_05_CAT_0':'tdi_ef_5_ano', 'FUN_06_CAT_0':'tdi_ef_6_ano',\n", - " 'FUN_07_CAT_0':'tdi_ef_7_ano', 'FUN_08_CAT_0':'tdi_ef_8_ano', 'FUN_09_CAT_0':'tdi_ef_9_ano',\n", - " 'MED_CAT_0':'tdi_em', 'MED_01_CAT_0':'tdi_em_1_ano', 'MED_02_CAT_0':'tdi_em_2_ano',\n", - " 'MED_03_CAT_0':'tdi_em_3_ano', 'MED_04_CAT_0':'tdi_em_4_ano'}\n", - "\n", - "with ZipFile('/content/TDI_2022_BRASIL_REGIOES_UFS.zip') as z:\n", - " with z.open('TDI_2022_BRASIL_REGIOES_UFS/TDI_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", - " tdi = pd.read_excel(f, skiprows=8, dtype=str)\n", - " tdi = tdi[tdi.UNIDGEO.isin(nome_ufs)]\n", - " tdi['sigla_uf'] = tdi['UNIDGEO'].map(ufs)\n", - " tdi = tdi.replace('--', '')\n", - " tdi.rename(columns=rename_tdi, inplace=True)\n", - " tdi['localizacao'] = tdi['localizacao'].str.lower().replace('pública', 'publica')\n", - " tdi['rede'] = tdi['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "# rename_tnr = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", - "# '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", - "# '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", - "# '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", - "# '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", - "# '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", - "\n", - "# with ZipFile('/content/tnr_municipios_2021.zip') as z:\n", - "# with z.open('tnr_municipios_2021/tnr_municipios_2021.xlsx') as f:\n", - "# tnr = pd.read_excel(f, skiprows=8, dtype=str)\n", - "# tnr = tnr[tnr.UNIDGEO.isin(nome_ufs)]\n", - "# tnr['sigla_uf'] = tnr['UNIDGEO'].map(ufs)\n", - "# tnr = tnr.replace('--', '')\n", - "# tnr.rename(columns=rename_tnr, inplace=True)\n", - "# tnr[index] = tnr[index].astype(str)\n", - "# tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", - "# tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", - " \n", - "\n", - "# rename_tx = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", - "# '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", - "# '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", - "# '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", - "# '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", - "# '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", - "# '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", - "# '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", - "# '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", - "# '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", - "# '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", - "# '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", - "# '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", - "# '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", - "# '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", - "# '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", - "# '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", - "# '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", - "# '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", - "\n", - "# with ZipFile('/content/tx_rend_municipios_2021.zip') as z:\n", - "# with z.open('tx_rend_municipios_2021/tx_rend_municipios_2021.xlsx') as f:\n", - "# tx = pd.read_excel(f, skiprows=8, dtype=str)\n", - "# tx = tx[tx.UNIDGEO.isin(nome_ufs)]\n", - "# tx['sigla_uf'] = tx['UNIDGEO'].map(ufs)\n", - "# tx = tx.replace('--', '')\n", - "# tx.rename(columns=rename_tx, inplace=True)\n", - "# tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", - "# tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", - " \n", - "index = ['ano',\t'UNIDGEO',\t'sigla_uf', 'localizacao',\t'rede'] \n", - "df = pd.merge(afd, atu, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, dsu, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, had, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, icg, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, ied, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, ird, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, tdi, how='outer', left_on=index, right_on=index)\n", - "# df = pd.merge(df, tnr, how='outer', left_on=index, right_on=index)\n", - "# df = pd.merge(df, tx, how='outer', left_on=index, right_on=index)\n", - "\n", - "#COMO ALGUNS DADOS NÃO FORAM ATUALIZADOS, CRIA-SE DF2 COMO COMPLEMENTAR PARA A ORDENAÇÃO DA BASE\n", - "df_list = df.columns\n", - "df2 = list(set(ordem) - set(df_list))\n", - "df = pd.concat([df, pd.DataFrame(columns=df2)])\n", - "#ORDENA TODAS AS VARIÁVEIS \n", - "df = df[ordem]\n", - "\n", - "df.to_csv('/content/uf.csv', index=False, encoding='utf-8', na_rep='')" - ], - "metadata": { - "id": "oBKWb3F6z_7E" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "### Município" - ], - "metadata": { - "id": "GuTAdm0WqxK3" - } - }, - { - "cell_type": "code", - "source": [ - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/AFD_2021_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/ICG_2021_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/IED_2021_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/ATU_2021_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/HAD_2021_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/DSU_2021_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/IRD_2021_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/TDI_2021_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/tnr_municipios_2021.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/tx_rend_municipios_2021.zip" - ], - "metadata": { - "id": "A0OiYmdYrBHb" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/AFD_2022_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ICG_2022_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IED_2022_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ATU_2022_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/HAD_2022_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/DSU_2022_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IRD_2022_MUNICIPIOS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/TDI_2022_MUNICIPIOS.zip" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "k8-Pm8Vz7OaA", - "outputId": "79547f49-52e5-42e7-b736-817cda78e1ba" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2023-02-11 14:43:00-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/AFD_2022_MUNICIPIOS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 23472229 (22M) [application/zip]\n", - "Saving to: ‘AFD_2022_MUNICIPIOS.zip’\n", - "\n", - "AFD_2022_MUNICIPIOS 100%[===================>] 22.38M 690KB/s in 35s \n", - "\n", - "2023-02-11 14:43:36 (660 KB/s) - ‘AFD_2022_MUNICIPIOS.zip’ saved [23472229/23472229]\n", - "\n", - "--2023-02-11 14:43:36-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ICG_2022_MUNICIPIOS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 6890745 (6.6M) [application/zip]\n", - "Saving to: ‘ICG_2022_MUNICIPIOS.zip’\n", - "\n", - "ICG_2022_MUNICIPIOS 100%[===================>] 6.57M 711KB/s in 10s \n", - "\n", - "2023-02-11 14:43:47 (655 KB/s) - ‘ICG_2022_MUNICIPIOS.zip’ saved [6890745/6890745]\n", - "\n", - "--2023-02-11 14:43:47-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IED_2022_MUNICIPIOS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 18534006 (18M) [application/zip]\n", - "Saving to: ‘IED_2022_MUNICIPIOS.zip’\n", - "\n", - "IED_2022_MUNICIPIOS 100%[===================>] 17.67M 705KB/s in 27s \n", - "\n", - "2023-02-11 14:44:15 (673 KB/s) - ‘IED_2022_MUNICIPIOS.zip’ saved [18534006/18534006]\n", - "\n", - "--2023-02-11 14:44:15-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ATU_2022_MUNICIPIOS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 18120878 (17M) [application/zip]\n", - "Saving to: ‘ATU_2022_MUNICIPIOS.zip’\n", - "\n", - "ATU_2022_MUNICIPIOS 100%[===================>] 17.28M 689KB/s in 26s \n", - "\n", - "2023-02-11 14:44:42 (669 KB/s) - ‘ATU_2022_MUNICIPIOS.zip’ saved [18120878/18120878]\n", - "\n", - "--2023-02-11 14:44:43-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/HAD_2022_MUNICIPIOS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 14552148 (14M) [application/zip]\n", - "Saving to: ‘HAD_2022_MUNICIPIOS.zip’\n", - "\n", - "HAD_2022_MUNICIPIOS 100%[===================>] 13.88M 677KB/s in 22s \n", - "\n", - "2023-02-11 14:45:05 (650 KB/s) - ‘HAD_2022_MUNICIPIOS.zip’ saved [14552148/14552148]\n", - "\n", - "--2023-02-11 14:45:05-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/DSU_2022_MUNICIPIOS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 11135907 (11M) [application/zip]\n", - "Saving to: ‘DSU_2022_MUNICIPIOS.zip’\n", - "\n", - "DSU_2022_MUNICIPIOS 100%[===================>] 10.62M 719KB/s in 16s \n", - "\n", - "2023-02-11 14:45:23 (674 KB/s) - ‘DSU_2022_MUNICIPIOS.zip’ saved [11135907/11135907]\n", - "\n", - "--2023-02-11 14:45:23-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IRD_2022_MUNICIPIOS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 6044184 (5.8M) [application/zip]\n", - "Saving to: ‘IRD_2022_MUNICIPIOS.zip’\n", - "\n", - "IRD_2022_MUNICIPIOS 100%[===================>] 5.76M 690KB/s in 9.3s \n", - "\n", - "2023-02-11 14:45:33 (635 KB/s) - ‘IRD_2022_MUNICIPIOS.zip’ saved [6044184/6044184]\n", - "\n", - "--2023-02-11 14:45:33-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/TDI_2022_MUNICIPIOS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 14627876 (14M) [application/zip]\n", - "Saving to: ‘TDI_2022_MUNICIPIOS.zip’\n", - "\n", - "TDI_2022_MUNICIPIOS 100%[===================>] 13.95M 697KB/s in 22s \n", - "\n", - "2023-02-11 14:45:56 (654 KB/s) - ‘TDI_2022_MUNICIPIOS.zip’ saved [14627876/14627876]\n", - "\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "ordem = ['id_municipio',\n", - "'localizacao',\n", - "'rede',\n", - "'atu_ei',\n", - "'atu_ei_creche',\n", - "'atu_ei_pre_escola',\n", - "'atu_ef',\n", - "'atu_ef_anos_iniciais',\n", - "'atu_ef_anos_finais',\n", - "'atu_ef_1_ano',\n", - "'atu_ef_2_ano',\n", - "'atu_ef_3_ano',\n", - "'atu_ef_4_ano',\n", - "'atu_ef_5_ano',\n", - "'atu_ef_6_ano',\n", - "'atu_ef_7_ano',\n", - "'atu_ef_8_ano',\n", - "'atu_ef_9_ano',\n", - "'atu_ef_turmas_unif_multi_fluxo',\n", - "'atu_em',\n", - "'atu_em_1_ano',\n", - "'atu_em_2_ano',\n", - "'atu_em_3_ano',\n", - "'atu_em_4_ano',\n", - "'atu_em_nao_seriado',\n", - "'had_ei',\n", - "'had_ei_creche',\n", - "'had_ei_pre_escola',\n", - "'had_ef',\n", - "'had_ef_anos_iniciais',\n", - "'had_ef_anos_finais',\n", - "'had_ef_1_ano',\n", - "'had_ef_2_ano',\n", - "'had_ef_3_ano',\n", - "'had_ef_4_ano',\n", - "'had_ef_5_ano',\n", - "'had_ef_6_ano',\n", - "'had_ef_7_ano',\n", - "'had_ef_8_ano',\n", - "'had_ef_9_ano',\n", - "'had_em',\n", - "'had_em_1_ano',\n", - "'had_em_2_ano',\n", - "'had_em_3_ano',\n", - "'had_em_4_ano',\n", - "'had_em_nao_seriado',\n", - "'tdi_ef',\n", - "'tdi_ef_anos_iniciais',\n", - "'tdi_ef_anos_finais',\n", - "'tdi_ef_1_ano',\n", - "'tdi_ef_2_ano',\n", - "'tdi_ef_3_ano',\n", - "'tdi_ef_4_ano',\n", - "'tdi_ef_5_ano',\n", - "'tdi_ef_6_ano',\n", - "'tdi_ef_7_ano',\n", - "'tdi_ef_8_ano',\n", - "'tdi_ef_9_ano',\n", - "'tdi_em',\n", - "'tdi_em_1_ano',\n", - "'tdi_em_2_ano',\n", - "'tdi_em_3_ano',\n", - "'tdi_em_4_ano',\n", - "'taxa_aprovacao_ef',\n", - "'taxa_aprovacao_ef_anos_iniciais',\n", - "'taxa_aprovacao_ef_anos_finais',\n", - "'taxa_aprovacao_ef_1_ano',\n", - "'taxa_aprovacao_ef_2_ano',\n", - "'taxa_aprovacao_ef_3_ano',\n", - "'taxa_aprovacao_ef_4_ano',\n", - "'taxa_aprovacao_ef_5_ano',\n", - "'taxa_aprovacao_ef_6_ano',\n", - "'taxa_aprovacao_ef_7_ano',\n", - "'taxa_aprovacao_ef_8_ano',\n", - "'taxa_aprovacao_ef_9_ano',\n", - "'taxa_aprovacao_em',\n", - "'taxa_aprovacao_em_1_ano',\n", - "'taxa_aprovacao_em_2_ano',\n", - "'taxa_aprovacao_em_3_ano',\n", - "'taxa_aprovacao_em_4_ano',\n", - "'taxa_aprovacao_em_nao_seriado',\n", - "'taxa_reprovacao_ef',\n", - "'taxa_reprovacao_ef_anos_iniciais',\n", - "'taxa_reprovacao_ef_anos_finais',\n", - "'taxa_reprovacao_ef_1_ano',\n", - "'taxa_reprovacao_ef_2_ano',\n", - "'taxa_reprovacao_ef_3_ano',\n", - "'taxa_reprovacao_ef_4_ano',\n", - "'taxa_reprovacao_ef_5_ano',\n", - "'taxa_reprovacao_ef_6_ano',\n", - "'taxa_reprovacao_ef_7_ano',\n", - "'taxa_reprovacao_ef_8_ano',\n", - "'taxa_reprovacao_ef_9_ano',\n", - "'taxa_reprovacao_em',\n", - "'taxa_reprovacao_em_1_ano',\n", - "'taxa_reprovacao_em_2_ano',\n", - "'taxa_reprovacao_em_3_ano',\n", - "'taxa_reprovacao_em_4_ano',\n", - "'taxa_reprovacao_em_nao_seriado',\n", - "'taxa_abandono_ef',\n", - "'taxa_abandono_ef_anos_iniciais',\n", - "'taxa_abandono_ef_anos_finais',\n", - "'taxa_abandono_ef_1_ano',\n", - "'taxa_abandono_ef_2_ano',\n", - "'taxa_abandono_ef_3_ano',\n", - "'taxa_abandono_ef_4_ano',\n", - "'taxa_abandono_ef_5_ano',\n", - "'taxa_abandono_ef_6_ano',\n", - "'taxa_abandono_ef_7_ano',\n", - "'taxa_abandono_ef_8_ano',\n", - "'taxa_abandono_ef_9_ano',\n", - "'taxa_abandono_em',\n", - "'taxa_abandono_em_1_ano',\n", - "'taxa_abandono_em_2_ano',\n", - "'taxa_abandono_em_3_ano',\n", - "'taxa_abandono_em_4_ano',\n", - "'taxa_abandono_em_nao_seriado',\n", - "'tnr_ef',\n", - "'tnr_ef_anos_iniciais',\n", - "'tnr_ef_anos_finais',\n", - "'tnr_ef_1_ano',\n", - "'tnr_ef_2_ano',\n", - "'tnr_ef_3_ano',\n", - "'tnr_ef_4_ano',\n", - "'tnr_ef_5_ano',\n", - "'tnr_ef_6_ano',\n", - "'tnr_ef_7_ano',\n", - "'tnr_ef_8_ano',\n", - "'tnr_ef_9_ano',\n", - "'tnr_em',\n", - "'tnr_em_1_ano',\n", - "'tnr_em_2_ano',\n", - "'tnr_em_3_ano',\n", - "'tnr_em_4_ano',\n", - "'tnr_em_nao_seriado',\n", - "'dsu_ei',\n", - "'dsu_ei_creche',\n", - "'dsu_ei_pre_escola',\n", - "'dsu_ef',\n", - "'dsu_ef_anos_iniciais',\n", - "'dsu_ef_anos_finais',\n", - "'dsu_em',\n", - "'dsu_ep',\n", - "'dsu_eja',\n", - "'dsu_ee',\n", - "'afd_ei_grupo_1',\n", - "'afd_ei_grupo_2',\n", - "'afd_ei_grupo_3',\n", - "'afd_ei_grupo_4',\n", - "'afd_ei_grupo_5',\n", - "'afd_ef_grupo_1',\n", - "'afd_ef_grupo_2',\n", - "'afd_ef_grupo_3',\n", - "'afd_ef_grupo_4',\n", - "'afd_ef_grupo_5',\n", - "'afd_ef_anos_iniciais_grupo_1',\n", - "'afd_ef_anos_iniciais_grupo_2',\n", - "'afd_ef_anos_iniciais_grupo_3',\n", - "'afd_ef_anos_iniciais_grupo_4',\n", - "'afd_ef_anos_iniciais_grupo_5',\n", - "'afd_ef_anos_finais_grupo_1',\n", - "'afd_ef_anos_finais_grupo_2',\n", - "'afd_ef_anos_finais_grupo_3',\n", - "'afd_ef_anos_finais_grupo_4',\n", - "'afd_ef_anos_finais_grupo_5',\n", - "'afd_em_grupo_1',\n", - "'afd_em_grupo_2',\n", - "'afd_em_grupo_3',\n", - "'afd_em_grupo_4',\n", - "'afd_em_grupo_5',\n", - "'afd_eja_fundamental_grupo_1',\n", - "'afd_eja_fundamental_grupo_2',\n", - "'afd_eja_fundamental_grupo_3',\n", - "'afd_eja_fundamental_grupo_4',\n", - "'afd_eja_fundamental_grupo_5',\n", - "'afd_eja_medio_grupo_1',\n", - "'afd_eja_medio_grupo_2',\n", - "'afd_eja_medio_grupo_3',\n", - "'afd_eja_medio_grupo_4',\n", - "'afd_eja_medio_grupo_5',\n", - "'ird_baixa_regularidade',\n", - "'ird_media_baixa',\n", - "'ird_media_alta',\n", - "'ird_alta',\n", - "'ied_ef_nivel_1',\n", - "'ied_ef_nivel_2',\n", - "'ied_ef_nivel_3',\n", - "'ied_ef_nivel_4',\n", - "'ied_ef_nivel_5',\n", - "'ied_ef_nivel_6',\n", - "'ied_ef_anos_iniciais_nivel_1',\n", - "'ied_ef_anos_iniciais_nivel_2',\n", - "'ied_ef_anos_iniciais_nivel_3',\n", - "'ied_ef_anos_iniciais_nivel_4',\n", - "'ied_ef_anos_iniciais_nivel_5',\n", - "'ied_ef_anos_iniciais_nivel_6',\n", - "'ied_ef_anos_finais_nivel_1',\n", - "'ied_ef_anos_finais_nivel_2',\n", - "'ied_ef_anos_finais_nivel_3',\n", - "'ied_ef_anos_finais_nivel_4',\n", - "'ied_ef_anos_finais_nivel_5',\n", - "'ied_ef_anos_finais_nivel_6',\n", - "'ied_em_nivel_1',\n", - "'ied_em_nivel_2',\n", - "'ied_em_nivel_3',\n", - "'ied_em_nivel_4',\n", - "'ied_em_nivel_5',\n", - "'ied_em_nivel_6',\n", - "'icg_nivel_1',\n", - "'icg_nivel_2',\n", - "'icg_nivel_3',\n", - "'icg_nivel_4',\n", - "'icg_nivel_5',\n", - "'icg_nivel_6']" - ], - "metadata": { - "id": "JJrhDnn8UaJ-" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "#2021\n", - "rename_afd = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', 'ED_INF_CAT_1':'afd_ei_grupo_1', 'ED_INF_CAT_2':'afd_ei_grupo_2',\n", - " 'ED_INF_CAT_3':'afd_ei_grupo_3', 'ED_INF_CAT_4':'afd_ei_grupo_4', 'ED_INF_CAT_5':'afd_ei_grupo_5', 'FUN_CAT_1':'afd_ef_grupo_1', \n", - " 'FUN_CAT_2':'afd_ef_grupo_2', 'FUN_CAT_3':'afd_ef_grupo_3', 'FUN_CAT_4':'afd_ef_grupo_4', 'FUN_CAT_5':'afd_ef_grupo_5', 'FUN_AI_CAT_1':'afd_ef_anos_iniciais_grupo_1',\n", - " 'FUN_AI_CAT_2':'afd_ef_anos_iniciais_grupo_2', 'FUN_AI_CAT_3':'afd_ef_anos_iniciais_grupo_3', 'FUN_AI_CAT_4':'afd_ef_anos_iniciais_grupo_4',\n", - " 'FUN_AI_CAT_5':'afd_ef_anos_iniciais_grupo_5', 'FUN_AF_CAT_1':'afd_ef_anos_finais_grupo_1', 'FUN_AF_CAT_2':'afd_ef_anos_finais_grupo_2',\n", - " 'FUN_AF_CAT_3':'afd_ef_anos_finais_grupo_3', 'FUN_AF_CAT_4':'afd_ef_anos_finais_grupo_4', 'FUN_AF_CAT_5':'afd_ef_anos_finais_grupo_5',\n", - " 'MED_CAT_1':'afd_em_grupo_1', 'MED_CAT_2':'afd_em_grupo_2', 'MED_CAT_3':'afd_em_grupo_3', 'MED_CAT_4':'afd_em_grupo_4', 'MED_CAT_5':'afd_em_grupo_5',\n", - " 'EJA_FUN_CAT_1':'afd_eja_fundamental_grupo_1', 'EJA_FUN_CAT_2':'afd_eja_fundamental_grupo_2', 'EJA_FUN_CAT_3':'afd_eja_fundamental_grupo_3',\n", - " 'EJA_FUN_CAT_4':'afd_eja_fundamental_grupo_4', 'EJA_FUN_CAT_5':'afd_eja_fundamental_grupo_5', 'EJA_MED_CAT_1':'afd_eja_medio_grupo_1',\n", - " 'EJA_MED_CAT_2':'afd_eja_medio_grupo_2', 'EJA_MED_CAT_3':'afd_eja_medio_grupo_3', 'EJA_MED_CAT_4':'afd_eja_medio_grupo_4', \n", - " 'EJA_MED_CAT_5':'afd_eja_medio_grupo_5'}\n", - "\n", - "with ZipFile('/content/AFD_2021_MUNICIPIOS.zip') as z:\n", - " with z.open('AFD_2021_MUNICIPIOS/AFD_MUNICIPIOS_2021.xlsx') as f:\n", - " afd = pd.read_excel(f, skiprows=10, dtype=str)\n", - " afd.drop(range(67126, 67132), inplace=True)\n", - " afd = afd.replace('--', '')\n", - " afd.rename(columns=rename_afd, inplace=True)\n", - " afd[index] = afd[index].astype(str)\n", - " afd['localizacao'] = afd['localizacao'].str.lower().replace('pública', 'publica')\n", - " afd['rede'] = afd['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_atu = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'atu_ei', 'CRE_CAT_0':'atu_ei_creche', 'PRE_CAT_0':'atu_ei_pre_escola', 'FUN_CAT_0':'atu_ef',\n", - " 'FUN_AI_CAT_0':'atu_ef_anos_iniciais', 'FUN_AF_CAT_0':'atu_ef_anos_finais', 'FUN_01_CAT_0':'atu_ef_1_ano',\n", - " 'FUN_02_CAT_0':'atu_ef_2_ano', 'FUN_03_CAT_0':'atu_ef_3_ano', 'FUN_04_CAT_0':'atu_ef_4_ano',\n", - " 'FUN_05_CAT_0':'atu_ef_5_ano', 'FUN_06_CAT_0':'atu_ef_6_ano', 'FUN_07_CAT_0':'atu_ef_7_ano', \n", - " 'FUN_08_CAT_0':'atu_ef_8_ano', 'FUN_09_CAT_0':'atu_ef_9_ano', 'MULT_ETA_CAT_0':'atu_ef_turmas_unif_multi_fluxo',\n", - " 'MED_CAT_0':'atu_em', 'MED_01_CAT_0':'atu_em_1_ano', 'MED_02_CAT_0':'atu_em_2_ano', 'MED_03_CAT_0':'atu_em_3_ano',\n", - " 'MED_04_CAT_0':'atu_em_4_ano', 'MED_NS_CAT_0':'atu_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/ATU_2021_MUNICIPIOS.zip') as z:\n", - " with z.open('ATU_2021_MUNICIPIOS/ATU_MUNICIPIOS_2021.xlsx') as f:\n", - " atu = pd.read_excel(f, skiprows=8, dtype=str)\n", - " atu.drop(range(66464, 66467), inplace=True)\n", - " atu = atu.replace('--', '')\n", - " atu.rename(columns=rename_atu, inplace=True)\n", - " atu['localizacao'] = atu['localizacao'].str.lower().replace('pública', 'publica')\n", - " atu['rede'] = atu['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_dsu = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'dsu_ei', 'CRE_CAT_0':'dsu_ei_creche', 'PRE_CAT_0':'dsu_ei_pre_escola', 'FUN_CAT_0':'dsu_ef',\n", - " 'FUN_AI_CAT_0':'dsu_ef_anos_iniciais', 'FUN_AF_CAT_0':'dsu_ef_anos_finais', 'MED_CAT_0':'dsu_em',\n", - " 'PROF_CAT_0':'dsu_ep', 'EJA_CAT_0':'dsu_eja', 'EDU_BAS_CAT_0':'dsu_ee'}\n", - "\n", - "with ZipFile('/content/DSU_2021_MUNICIPIOS.zip') as z:\n", - " with z.open('DSU_2021_MUNICIPIOS/DSU_MUNICIPIOS_2021.xlsx') as f:\n", - " dsu = pd.read_excel(f, skiprows=9, dtype=str)\n", - " dsu.drop(range(67294, 67296), inplace=True)\n", - " dsu = dsu.replace('--', '')\n", - " dsu.rename(columns=rename_dsu, inplace=True)\n", - " dsu['localizacao'] = dsu['localizacao'].str.lower().replace('pública', 'publica')\n", - " dsu['rede'] = dsu['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_had = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'had_ei', 'CRE_CAT_0':'had_ei_creche', 'PRE_CAT_0':'had_ei_pre_escola',\n", - " 'FUN_CAT_0':'had_ef', 'FUN_AI_CAT_0':'had_ef_anos_iniciais', 'FUN_AF_CAT_0':'had_ef_anos_finais', 'FUN_01_CAT_0':'had_ef_1_ano',\n", - " 'FUN_02_CAT_0':'had_ef_2_ano', 'FUN_03_CAT_0':'had_ef_3_ano', 'FUN_04_CAT_0':'had_ef_4_ano', 'FUN_05_CAT_0':'had_ef_5_ano',\n", - " 'FUN_06_CAT_0':'had_ef_6_ano', 'FUN_07_CAT_0':'had_ef_7_ano', 'FUN_08_CAT_0':'had_ef_8_ano', 'FUN_09_CAT_0':'had_ef_9_ano',\n", - " 'MED_CAT_0':'had_em', 'MED_01_CAT_0':'had_em_1_ano', 'MED_02_CAT_0':'had_em_2_ano', 'MED_03_CAT_0':'had_em_3_ano',\n", - " 'MED_04_CAT_0':'had_em_4_ano', 'MED_NS_CAT_01':'had_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/HAD_2021_MUNICIPIOS.zip') as z:\n", - " with z.open('HAD_2021_MUNICIPIOS/HAD_MUNICIPIOS_2021.xlsx') as f:\n", - " had = pd.read_excel(f, skiprows=8, dtype=str)\n", - " had.drop(range(65548, 65551), inplace=True)\n", - " had = had.replace('--', '')\n", - " had.rename(columns=rename_had, inplace=True)\n", - " had['localizacao'] = had['localizacao'].str.lower().replace('pública', 'publica')\n", - " had['rede'] = had['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_icg = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'EDU_BAS_CAT_1':'icg_nivel_1', 'EDU_BAS_CAT_2':'icg_nivel_2', 'EDU_BAS_CAT_3':'icg_nivel_3',\n", - " 'EDU_BAS_CAT_4':'icg_nivel_4', 'EDU_BAS_CAT_5':'icg_nivel_5', 'EDU_BAS_CAT_6':'icg_nivel_6'}\n", - "\n", - "with ZipFile('/content/ICG_2021_MUNICIPIOS.zip') as z:\n", - " with z.open('ICG_2021_MUNICIPIOS/ICG_MUNICIPIOS_2021.xlsx') as f:\n", - " icg = pd.read_excel(f, skiprows=8, dtype=str)\n", - " icg.drop(range(67302, 67307), inplace=True)\n", - " icg = icg.replace('--', '')\n", - " icg.rename(columns=rename_icg, inplace=True)\n", - " icg['localizacao'] = icg['localizacao'].str.lower().replace('pública', 'publica')\n", - " icg['rede'] = icg['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_ied = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'FUN_CAT_1':'ied_ef_nivel_1', 'FUN_CAT_2':'ied_ef_nivel_2', 'FUN_CAT_3':'ied_ef_nivel_3',\n", - " 'FUN_CAT_4':'ied_ef_nivel_4', 'FUN_CAT_5':'ied_ef_nivel_5', 'FUN_CAT_6':'ied_ef_nivel_6',\n", - " 'FUN_AI_CAT_1':'ied_ef_anos_iniciais_nivel_1', 'FUN_AI_CAT_2':'ied_ef_anos_iniciais_nivel_2',\n", - " 'FUN_AI_CAT_3':'ied_ef_anos_iniciais_nivel_3', 'FUN_AI_CAT_4':'ied_ef_anos_iniciais_nivel_4',\n", - " 'FUN_AI_CAT_5':'ied_ef_anos_iniciais_nivel_5', 'FUN_AI_CAT_6':'ied_ef_anos_iniciais_nivel_6',\n", - " 'FUN_AF_CAT_1':'ied_ef_anos_finais_nivel_1', 'FUN_AF_CAT_2':'ied_ef_anos_finais_nivel_2',\n", - " 'FUN_AF_CAT_3':'ied_ef_anos_finais_nivel_3', 'FUN_AF_CAT_4':'ied_ef_anos_finais_nivel_4',\n", - " 'FUN_AF_CAT_5':'ied_ef_anos_finais_nivel_5', 'FUN_AF_CAT_6':'ied_ef_anos_finais_nivel_6',\n", - " 'MED_CAT_1':'ied_em_nivel_1', 'MED_CAT_2':'ied_em_nivel_2', 'MED_CAT_3':'ied_em_nivel_3',\n", - " 'MED_CAT_4':'ied_em_nivel_4', 'MED_CAT_5':'ied_em_nivel_5', 'MED_CAT_6':'ied_em_nivel_6'}\n", - "\n", - "with ZipFile('/content/IED_2021_MUNICIPIOS.zip') as z:\n", - " with z.open('IED_2021_MUNICIPIOS/IED_MUNICIPIOS_2021.xlsx') as f:\n", - " ied = pd.read_excel(f, skiprows=10, dtype=str)\n", - " ied.drop(range(66286, 66291), inplace=True)\n", - " ied = ied.replace('--', '')\n", - " ied.rename(columns=rename_ied, inplace=True)\n", - " ied['localizacao'] = ied['localizacao'].str.lower().replace('pública', 'publica')\n", - " ied['rede'] = ied['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_ird = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'EDU_BAS_CAT_1':'ird_baixa_regularidade', 'EDU_BAS_CAT_2':'ird_media_baixa', \n", - " 'EDU_BAS_CAT_3':'ird_media_alta', 'EDU_BAS_CAT_4':'ird_alta'}\n", - "\n", - "with ZipFile('/content/IRD_2021_MUNICIPIOS.zip') as z:\n", - " with z.open('IRD_2021_MUNICIPIOS/IRD_MUNICIPIOS_2021.xlsx') as f:\n", - " ird = pd.read_excel(f, skiprows=9, dtype=str)\n", - " ird.drop(range(66876, 66881), inplace=True)\n", - " ird = ird.replace('--', '')\n", - " ird.rename(columns=rename_ird, inplace=True)\n", - " ird['localizacao'] = ird['localizacao'].str.lower().replace('pública', 'publica')\n", - " ird['rede'] = ird['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_tdi = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'FUN_CAT_0':'tdi_ef', 'FUN_AI_CAT_0':'tdi_ef_anos_iniciais', 'FUN_AF_CAT_0':'tdi_ef_anos_finais',\n", - " 'FUN_01_CAT_0':'tdi_ef_1_ano', 'FUN_02_CAT_0':'tdi_ef_2_ano', 'FUN_03_CAT_0':'tdi_ef_3_ano',\n", - " 'FUN_04_CAT_0':'tdi_ef_4_ano', 'FUN_05_CAT_0':'tdi_ef_5_ano', 'FUN_06_CAT_0':'tdi_ef_6_ano',\n", - " 'FUN_07_CAT_0':'tdi_ef_7_ano', 'FUN_08_CAT_0':'tdi_ef_8_ano', 'FUN_09_CAT_0':'tdi_ef_9_ano',\n", - " 'MED_CAT_0':'tdi_em', 'MED_01_CAT_0':'tdi_em_1_ano', 'MED_02_CAT_0':'tdi_em_2_ano',\n", - " 'MED_03_CAT_0':'tdi_em_3_ano', 'MED_04_CAT_0':'tdi_em_4_ano'}\n", - "\n", - "with ZipFile('/content/TDI_2021_MUNICIPIOS.zip') as z:\n", - " with z.open('TDI_2021_MUNICIPIOS/TDI_MUNICIPIOS_2021.xlsx') as f:\n", - " tdi = pd.read_excel(f, skiprows=8, dtype=str)\n", - " tdi.drop(range(65538, 65540), inplace=True)\n", - " tdi = tdi.replace('--', '')\n", - " tdi.rename(columns=rename_tdi, inplace=True)\n", - " tdi['localizacao'] = tdi['localizacao'].str.lower().replace('pública', 'publica')\n", - " tdi['rede'] = tdi['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_tnr = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", - " '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", - " '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", - " '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", - " '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", - " '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/tnr_municipios_2021.zip') as z:\n", - " with z.open('tnr_municipios_2021/tnr_municipios_2021.xlsx') as f:\n", - " tnr = pd.read_excel(f, skiprows=8, dtype=str)\n", - " tnr.drop(range(65574, 65577), inplace=True)\n", - " tnr = tnr.replace('--', '')\n", - " tnr.rename(columns=rename_tnr, inplace=True)\n", - " tnr[index] = tnr[index].astype(str)\n", - " tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", - " tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", - " \n", - "\n", - "rename_tx = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", - " '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", - " '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", - " '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", - " '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", - " '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", - " '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", - " '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", - " '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", - " '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", - " '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", - " '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", - " '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", - " '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", - " '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", - " '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", - " '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", - " '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", - " '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/tx_rend_municipios_2021.zip') as z:\n", - " with z.open('tx_rend_municipios_2021/tx_rend_municipios_2021.xlsx') as f:\n", - " tx = pd.read_excel(f, skiprows=8, dtype=str)\n", - " tx.drop(range(65574, 65576), inplace=True)\n", - " tx = tx.replace('--', '')\n", - " tx.rename(columns=rename_tx, inplace=True)\n", - " tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", - " tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", - " \n", - "index = ['ano',\t'NO_REGIAO',\t'SG_UF',\t'id_municipio',\t'NO_MUNICIPIO',\t'localizacao',\t'rede'] \n", - "df = pd.merge(afd, atu, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, dsu, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, had, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, icg, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, ied, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, ird, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, tdi, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, tnr, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, tx, how='outer', left_on=index, right_on=index)\n", - "df = df[ordem]\n", - "df.to_csv('/content/municipio.csv', index=False, encoding='utf-8', na_rep='')" - ], - "metadata": { - "id": "zDqi1IAvqvp2" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "#2022\n", - "rename_afd = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', 'ED_INF_CAT_1':'afd_ei_grupo_1', 'ED_INF_CAT_2':'afd_ei_grupo_2',\n", - " 'ED_INF_CAT_3':'afd_ei_grupo_3', 'ED_INF_CAT_4':'afd_ei_grupo_4', 'ED_INF_CAT_5':'afd_ei_grupo_5', 'FUN_CAT_1':'afd_ef_grupo_1', \n", - " 'FUN_CAT_2':'afd_ef_grupo_2', 'FUN_CAT_3':'afd_ef_grupo_3', 'FUN_CAT_4':'afd_ef_grupo_4', 'FUN_CAT_5':'afd_ef_grupo_5', 'FUN_AI_CAT_1':'afd_ef_anos_iniciais_grupo_1',\n", - " 'FUN_AI_CAT_2':'afd_ef_anos_iniciais_grupo_2', 'FUN_AI_CAT_3':'afd_ef_anos_iniciais_grupo_3', 'FUN_AI_CAT_4':'afd_ef_anos_iniciais_grupo_4',\n", - " 'FUN_AI_CAT_5':'afd_ef_anos_iniciais_grupo_5', 'FUN_AF_CAT_1':'afd_ef_anos_finais_grupo_1', 'FUN_AF_CAT_2':'afd_ef_anos_finais_grupo_2',\n", - " 'FUN_AF_CAT_3':'afd_ef_anos_finais_grupo_3', 'FUN_AF_CAT_4':'afd_ef_anos_finais_grupo_4', 'FUN_AF_CAT_5':'afd_ef_anos_finais_grupo_5',\n", - " 'MED_CAT_1':'afd_em_grupo_1', 'MED_CAT_2':'afd_em_grupo_2', 'MED_CAT_3':'afd_em_grupo_3', 'MED_CAT_4':'afd_em_grupo_4', 'MED_CAT_5':'afd_em_grupo_5',\n", - " 'EJA_FUN_CAT_1':'afd_eja_fundamental_grupo_1', 'EJA_FUN_CAT_2':'afd_eja_fundamental_grupo_2', 'EJA_FUN_CAT_3':'afd_eja_fundamental_grupo_3',\n", - " 'EJA_FUN_CAT_4':'afd_eja_fundamental_grupo_4', 'EJA_FUN_CAT_5':'afd_eja_fundamental_grupo_5', 'EJA_MED_CAT_1':'afd_eja_medio_grupo_1',\n", - " 'EJA_MED_CAT_2':'afd_eja_medio_grupo_2', 'EJA_MED_CAT_3':'afd_eja_medio_grupo_3', 'EJA_MED_CAT_4':'afd_eja_medio_grupo_4', \n", - " 'EJA_MED_CAT_5':'afd_eja_medio_grupo_5'}\n", - "\n", - "with ZipFile('/content/AFD_2022_MUNICIPIOS.zip') as z:\n", - " with z.open('AFD_2022_MUNICIPIOS/AFD_MUNICIPIOS_2022.xlsx') as f:\n", - " afd = pd.read_excel(f, skiprows=10, dtype=str)\n", - " afd.drop(range(67118, 67123), inplace=True)\n", - " afd = afd.replace('--', '')\n", - " afd.rename(columns=rename_afd, inplace=True)\n", - " afd['localizacao'] = afd['localizacao'].str.lower().replace('pública', 'publica')\n", - " afd['rede'] = afd['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_atu = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'atu_ei', 'CRE_CAT_0':'atu_ei_creche', 'PRE_CAT_0':'atu_ei_pre_escola', 'FUN_CAT_0':'atu_ef',\n", - " 'FUN_AI_CAT_0':'atu_ef_anos_iniciais', 'FUN_AF_CAT_0':'atu_ef_anos_finais', 'FUN_01_CAT_0':'atu_ef_1_ano',\n", - " 'FUN_02_CAT_0':'atu_ef_2_ano', 'FUN_03_CAT_0':'atu_ef_3_ano', 'FUN_04_CAT_0':'atu_ef_4_ano',\n", - " 'FUN_05_CAT_0':'atu_ef_5_ano', 'FUN_06_CAT_0':'atu_ef_6_ano', 'FUN_07_CAT_0':'atu_ef_7_ano', \n", - " 'FUN_08_CAT_0':'atu_ef_8_ano', 'FUN_09_CAT_0':'atu_ef_9_ano', 'MULT_ETA_CAT_0':'atu_ef_turmas_unif_multi_fluxo',\n", - " 'MED_CAT_0':'atu_em', 'MED_01_CAT_0':'atu_em_1_ano', 'MED_02_CAT_0':'atu_em_2_ano', 'MED_03_CAT_0':'atu_em_3_ano',\n", - " 'MED_04_CAT_0':'atu_em_4_ano', 'MED_NS_CAT_0':'atu_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/ATU_2022_MUNICIPIOS.zip') as z:\n", - " with z.open('ATU_2022_MUNICIPIOS/ATU_MUNICIPIOS_2022.xlsx') as f:\n", - " atu = pd.read_excel(f, skiprows=8, dtype=str)\n", - " atu.drop(range(66515, 66518), inplace=True)\n", - " atu = atu.replace('--', '')\n", - " atu.rename(columns=rename_atu, inplace=True)\n", - " atu['localizacao'] = atu['localizacao'].str.lower().replace('pública', 'publica')\n", - " atu['rede'] = atu['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_dsu = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'dsu_ei', 'CRE_CAT_0':'dsu_ei_creche', 'PRE_CAT_0':'dsu_ei_pre_escola', 'FUN_CAT_0':'dsu_ef',\n", - " 'FUN_AI_CAT_0':'dsu_ef_anos_iniciais', 'FUN_AF_CAT_0':'dsu_ef_anos_finais', 'MED_CAT_0':'dsu_em',\n", - " 'PROF_CAT_0':'dsu_ep', 'EJA_CAT_0':'dsu_eja', 'EDU_BAS_CAT_0':'dsu_ee'}\n", - "\n", - "with ZipFile('/content/DSU_2022_MUNICIPIOS.zip') as z:\n", - " with z.open('DSU_2022_MUNICIPIOS/DSU_MUNICIPIOS_2022.xlsx') as f:\n", - " dsu = pd.read_excel(f, skiprows=9, dtype=str)\n", - " dsu.drop(range(67301, 67303), inplace=True)\n", - " dsu = dsu.replace('--', '')\n", - " dsu.rename(columns=rename_dsu, inplace=True)\n", - " dsu['localizacao'] = dsu['localizacao'].str.lower().replace('pública', 'publica')\n", - " dsu['rede'] = dsu['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_had = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'had_ei', 'CRE_CAT_0':'had_ei_creche', 'PRE_CAT_0':'had_ei_pre_escola',\n", - " 'FUN_CAT_0':'had_ef', 'FUN_AI_CAT_0':'had_ef_anos_iniciais', 'FUN_AF_CAT_0':'had_ef_anos_finais', 'FUN_01_CAT_0':'had_ef_1_ano',\n", - " 'FUN_02_CAT_0':'had_ef_2_ano', 'FUN_03_CAT_0':'had_ef_3_ano', 'FUN_04_CAT_0':'had_ef_4_ano', 'FUN_05_CAT_0':'had_ef_5_ano',\n", - " 'FUN_06_CAT_0':'had_ef_6_ano', 'FUN_07_CAT_0':'had_ef_7_ano', 'FUN_08_CAT_0':'had_ef_8_ano', 'FUN_09_CAT_0':'had_ef_9_ano',\n", - " 'MED_CAT_0':'had_em', 'MED_01_CAT_0':'had_em_1_ano', 'MED_02_CAT_0':'had_em_2_ano', 'MED_03_CAT_0':'had_em_3_ano',\n", - " 'MED_04_CAT_0':'had_em_4_ano', 'MED_NS_CAT_01':'had_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/HAD_2022_MUNICIPIOS.zip') as z:\n", - " with z.open('HAD_2022_MUNICIPIOS/HAD_MUNICIPIOS_2022.xlsx') as f:\n", - " had = pd.read_excel(f, skiprows=8, dtype=str)\n", - " had.drop(range(66509, 66512), inplace=True)\n", - " had = had.replace('--', '')\n", - " had.rename(columns=rename_had, inplace=True)\n", - " had['localizacao'] = had['localizacao'].str.lower().replace('pública', 'publica')\n", - " had['rede'] = had['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_icg = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'EDU_BAS_CAT_1':'icg_nivel_1', 'EDU_BAS_CAT_2':'icg_nivel_2', 'EDU_BAS_CAT_3':'icg_nivel_3',\n", - " 'EDU_BAS_CAT_4':'icg_nivel_4', 'EDU_BAS_CAT_5':'icg_nivel_5', 'EDU_BAS_CAT_6':'icg_nivel_6'}\n", - "\n", - "with ZipFile('/content/ICG_2022_MUNICIPIOS.zip') as z:\n", - " with z.open('ICG_2022_MUNICIPIOS/ICG_MUNICIPIOS_2022.xlsx') as f:\n", - " icg = pd.read_excel(f, skiprows=8, dtype=str)\n", - " icg.drop(range(67303, 67308), inplace=True)\n", - " icg = icg.replace('--', '')\n", - " icg.rename(columns=rename_icg, inplace=True)\n", - " icg['localizacao'] = icg['localizacao'].str.lower().replace('pública', 'publica')\n", - " icg['rede'] = icg['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_ied = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'FUN_CAT_1':'ied_ef_nivel_1', 'FUN_CAT_2':'ied_ef_nivel_2', 'FUN_CAT_3':'ied_ef_nivel_3',\n", - " 'FUN_CAT_4':'ied_ef_nivel_4', 'FUN_CAT_5':'ied_ef_nivel_5', 'FUN_CAT_6':'ied_ef_nivel_6',\n", - " 'FUN_AI_CAT_1':'ied_ef_anos_iniciais_nivel_1', 'FUN_AI_CAT_2':'ied_ef_anos_iniciais_nivel_2',\n", - " 'FUN_AI_CAT_3':'ied_ef_anos_iniciais_nivel_3', 'FUN_AI_CAT_4':'ied_ef_anos_iniciais_nivel_4',\n", - " 'FUN_AI_CAT_5':'ied_ef_anos_iniciais_nivel_5', 'FUN_AI_CAT_6':'ied_ef_anos_iniciais_nivel_6',\n", - " 'FUN_AF_CAT_1':'ied_ef_anos_finais_nivel_1', 'FUN_AF_CAT_2':'ied_ef_anos_finais_nivel_2',\n", - " 'FUN_AF_CAT_3':'ied_ef_anos_finais_nivel_3', 'FUN_AF_CAT_4':'ied_ef_anos_finais_nivel_4',\n", - " 'FUN_AF_CAT_5':'ied_ef_anos_finais_nivel_5', 'FUN_AF_CAT_6':'ied_ef_anos_finais_nivel_6',\n", - " 'MED_CAT_1':'ied_em_nivel_1', 'MED_CAT_2':'ied_em_nivel_2', 'MED_CAT_3':'ied_em_nivel_3',\n", - " 'MED_CAT_4':'ied_em_nivel_4', 'MED_CAT_5':'ied_em_nivel_5', 'MED_CAT_6':'ied_em_nivel_6'}\n", - "\n", - "with ZipFile('/content/IED_2022_MUNICIPIOS.zip') as z:\n", - " with z.open('IED_2022_MUNICIPIOS/IED_MUNICIPIOS_2022.xlsx') as f:\n", - " ied = pd.read_excel(f, skiprows=10, dtype=str)\n", - " ied.drop(range(66266, 66271), inplace=True)\n", - " ied = ied.replace('--', '')\n", - " ied.rename(columns=rename_ied, inplace=True)\n", - " ied['localizacao'] = ied['localizacao'].str.lower().replace('pública', 'publica')\n", - " ied['rede'] = ied['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_ird = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'EDU_BAS_CAT_1':'ird_baixa_regularidade', 'EDU_BAS_CAT_2':'ird_media_baixa', \n", - " 'EDU_BAS_CAT_3':'ird_media_alta', 'EDU_BAS_CAT_4':'ird_alta'}\n", - "\n", - "with ZipFile('/content/IRD_2022_MUNICIPIOS.zip') as z:\n", - " with z.open('IRD_2022_MUNICIPIOS/IRD_MUNICIPIOS_2022.xlsx') as f:\n", - " ird = pd.read_excel(f, skiprows=9, dtype=str)\n", - " ird.drop(range(66845, 66850), inplace=True)\n", - " ird = ird.replace('--', '')\n", - " ird.rename(columns=rename_ird, inplace=True)\n", - " ird['localizacao'] = ird['localizacao'].str.lower().replace('pública', 'publica')\n", - " ird['rede'] = ird['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_tdi = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'FUN_CAT_0':'tdi_ef', 'FUN_AI_CAT_0':'tdi_ef_anos_iniciais', 'FUN_AF_CAT_0':'tdi_ef_anos_finais',\n", - " 'FUN_01_CAT_0':'tdi_ef_1_ano', 'FUN_02_CAT_0':'tdi_ef_2_ano', 'FUN_03_CAT_0':'tdi_ef_3_ano',\n", - " 'FUN_04_CAT_0':'tdi_ef_4_ano', 'FUN_05_CAT_0':'tdi_ef_5_ano', 'FUN_06_CAT_0':'tdi_ef_6_ano',\n", - " 'FUN_07_CAT_0':'tdi_ef_7_ano', 'FUN_08_CAT_0':'tdi_ef_8_ano', 'FUN_09_CAT_0':'tdi_ef_9_ano',\n", - " 'MED_CAT_0':'tdi_em', 'MED_01_CAT_0':'tdi_em_1_ano', 'MED_02_CAT_0':'tdi_em_2_ano',\n", - " 'MED_03_CAT_0':'tdi_em_3_ano', 'MED_04_CAT_0':'tdi_em_4_ano'}\n", - "\n", - "with ZipFile('/content/TDI_2022_MUNICIPIOS.zip') as z:\n", - " with z.open('TDI_2022_MUNICIPIOS/TDI_MUNICIPIOS_2022.xlsx') as f:\n", - " tdi = pd.read_excel(f, skiprows=8, dtype=str)\n", - " tdi.drop(range(65548, 65550), inplace=True)\n", - " tdi = tdi.replace('--', '')\n", - " tdi.rename(columns=rename_tdi, inplace=True)\n", - " tdi['localizacao'] = tdi['localizacao'].str.lower().replace('pública', 'publica')\n", - " tdi['rede'] = tdi['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "# rename_tnr = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", - "# '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", - "# '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", - "# '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", - "# '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", - "# '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", - "\n", - "# with ZipFile('/content/tnr_municipios_2021.zip') as z:\n", - "# with z.open('tnr_municipios_2021/tnr_municipios_2021.xlsx') as f:\n", - "# tnr = pd.read_excel(f, skiprows=8, dtype=str)\n", - "# tnr.drop(range(65574, 65577), inplace=True)\n", - "# tnr = tnr.replace('--', '')\n", - "# tnr.rename(columns=rename_tnr, inplace=True)\n", - "# tnr[index] = tnr[index].astype(str)\n", - "# tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", - "# tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", - " \n", - "\n", - "# rename_tx = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", - "# '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", - "# '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", - "# '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", - "# '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", - "# '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", - "# '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", - "# '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", - "# '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", - "# '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", - "# '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", - "# '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", - "# '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", - "# '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", - "# '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", - "# '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", - "# '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", - "# '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", - "# '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", - "\n", - "# with ZipFile('/content/tx_rend_municipios_2021.zip') as z:\n", - "# with z.open('tx_rend_municipios_2021/tx_rend_municipios_2021.xlsx') as f:\n", - "# tx = pd.read_excel(f, skiprows=8, dtype=str)\n", - "# tx.drop(range(65574, 65576), inplace=True)\n", - "# tx = tx.replace('--', '')\n", - "# tx.rename(columns=rename_tx, inplace=True)\n", - "# tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", - "# tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", - " \n", - "index = ['ano',\t'NO_REGIAO',\t'SG_UF',\t'id_municipio',\t'NO_MUNICIPIO',\t'localizacao',\t'rede'] \n", - "df = pd.merge(afd, atu, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, dsu, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, had, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, icg, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, ied, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, ird, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, tdi, how='outer', left_on=index, right_on=index)\n", - "# df = pd.merge(df, tnr, how='outer', left_on=index, right_on=index)\n", - "# df = pd.merge(df, tx, how='outer', left_on=index, right_on=index)\n", - "#COMO ALGUNS DADOS NÃO FORAM ATUALIZADOS, CRIA-SE DF2 COMO COMPLEMENTAR PARA A ORDENAÇÃO DA BASE\n", - "df_list = df.columns\n", - "df2 = list(set(ordem) - set(df_list))\n", - "df = pd.concat([df, pd.DataFrame(columns=df2)])\n", - "#ORDENA TODAS AS VARIÁVEIS \n", - "df = df[ordem]\n", - "df.to_csv('/content/municipio_2022.csv', index=False, encoding='utf-8', na_rep='')" - ], - "metadata": { - "id": "b0qftgG48U4F" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "### Escola" - ], - "metadata": { - "id": "E-U-Wg62dkVH" - } - }, - { - "cell_type": "code", - "source": [ - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/AFD_2022_ESCOLAS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ICG_2022_ESCOLAS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IED_2022_ESCOLAS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ATU_2022_ESCOLAS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/HAD_2022_ESCOLAS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/DSU_2022_ESCOLAS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IRD_2022_ESCOLAS.zip\n", - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/TDI_2022_ESCOLAS.zip" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "FdOawGS8rhvd", - "outputId": "40e2261d-ddda-431b-e77b-d889172d8e88" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2023-02-11 18:24:21-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/AFD_2022_ESCOLAS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 59511457 (57M) [application/zip]\n", - "Saving to: ‘AFD_2022_ESCOLAS.zip.1’\n", - "\n", - "AFD_2022_ESCOLAS.zi 100%[===================>] 56.75M 293KB/s in 3m 10s \n", - "\n", - "2023-02-11 18:27:34 (306 KB/s) - ‘AFD_2022_ESCOLAS.zip.1’ saved [59511457/59511457]\n", - "\n", - "--2023-02-11 18:27:34-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ICG_2022_ESCOLAS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 18095429 (17M) [application/zip]\n", - "Saving to: ‘ICG_2022_ESCOLAS.zip’\n", - "\n", - "ICG_2022_ESCOLAS.zi 100%[===================>] 17.26M 283KB/s in 66s \n", - "\n", - "2023-02-11 18:28:41 (269 KB/s) - ‘ICG_2022_ESCOLAS.zip’ saved [18095429/18095429]\n", - "\n", - "--2023-02-11 18:28:41-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IED_2022_ESCOLAS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 39032819 (37M) [application/zip]\n", - "Saving to: ‘IED_2022_ESCOLAS.zip’\n", - "\n", - "IED_2022_ESCOLAS.zi 100%[===================>] 37.22M 357KB/s in 2m 7s \n", - "\n", - "2023-02-11 18:30:50 (301 KB/s) - ‘IED_2022_ESCOLAS.zip’ saved [39032819/39032819]\n", - "\n", - "--2023-02-11 18:30:50-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ATU_2022_ESCOLAS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 51091323 (49M) [application/zip]\n", - "Saving to: ‘ATU_2022_ESCOLAS.zip’\n", - "\n", - "ATU_2022_ESCOLAS.zi 100%[===================>] 48.72M 364KB/s in 3m 35s \n", - "\n", - "2023-02-11 18:34:27 (232 KB/s) - ‘ATU_2022_ESCOLAS.zip’ saved [51091323/51091323]\n", - "\n", - "--2023-02-11 18:34:27-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/HAD_2022_ESCOLAS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 41905778 (40M) [application/zip]\n", - "Saving to: ‘HAD_2022_ESCOLAS.zip’\n", - "\n", - "HAD_2022_ESCOLAS.zi 100%[===================>] 39.96M 369KB/s in 2m 24s \n", - "\n", - "2023-02-11 18:36:54 (283 KB/s) - ‘HAD_2022_ESCOLAS.zip’ saved [41905778/41905778]\n", - "\n", - "--2023-02-11 18:36:54-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/DSU_2022_ESCOLAS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 32236510 (31M) [application/zip]\n", - "Saving to: ‘DSU_2022_ESCOLAS.zip’\n", - "\n", - "DSU_2022_ESCOLAS.zi 100%[===================>] 30.74M 252KB/s in 1m 57s \n", - "\n", - "2023-02-11 18:38:52 (270 KB/s) - ‘DSU_2022_ESCOLAS.zip’ saved [32236510/32236510]\n", - "\n", - "--2023-02-11 18:38:52-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IRD_2022_ESCOLAS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 20263711 (19M) [application/zip]\n", - "Saving to: ‘IRD_2022_ESCOLAS.zip’\n", - "\n", - "IRD_2022_ESCOLAS.zi 100%[===================>] 19.32M 362KB/s in 59s \n", - "\n", - "2023-02-11 18:39:54 (333 KB/s) - ‘IRD_2022_ESCOLAS.zip’ saved [20263711/20263711]\n", - "\n", - "--2023-02-11 18:39:54-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/TDI_2022_ESCOLAS.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 35400577 (34M) [application/zip]\n", - "Saving to: ‘TDI_2022_ESCOLAS.zip’\n", - "\n", - "TDI_2022_ESCOLAS.zi 100%[===================>] 33.76M 334KB/s in 1m 59s \n", - "\n", - "2023-02-11 18:41:54 (291 KB/s) - ‘TDI_2022_ESCOLAS.zip’ saved [35400577/35400577]\n", - "\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "ordem = ['id_municipio', 'id_escola', 'localizacao', 'rede', \n", - " 'atu_ei', 'atu_ei_creche', 'atu_ei_pre_escola', 'atu_ef', 'atu_ef_anos_iniciais',\n", - " 'atu_ef_anos_finais', 'atu_ef_1_ano', 'atu_ef_2_ano', 'atu_ef_3_ano', 'atu_ef_4_ano', 'atu_ef_5_ano',\n", - " 'atu_ef_6_ano', 'atu_ef_7_ano', 'atu_ef_8_ano', 'atu_ef_9_ano', 'atu_ef_turmas_unif_multi_fluxo',\n", - " 'atu_em', 'atu_em_1_ano', 'atu_em_2_ano', 'atu_em_3_ano', 'atu_em_4_ano', 'atu_em_nao_seriado', 'had_ei',\n", - " 'had_ei_creche', 'had_ei_pre_escola', 'had_ef', 'had_ef_anos_iniciais', 'had_ef_anos_finais', 'had_ef_1_ano',\n", - " 'had_ef_2_ano', 'had_ef_3_ano', 'had_ef_4_ano', 'had_ef_5_ano', 'had_ef_6_ano', 'had_ef_7_ano',\n", - " 'had_ef_8_ano', 'had_ef_9_ano', 'had_em', 'had_em_1_ano', 'had_em_2_ano', 'had_em_3_ano', 'had_em_4_ano',\n", - " 'had_em_nao_seriado', 'tdi_ef', 'tdi_ef_anos_iniciais', 'tdi_ef_anos_finais', 'tdi_ef_1_ano', 'tdi_ef_2_ano',\n", - " 'tdi_ef_3_ano', 'tdi_ef_4_ano', 'tdi_ef_5_ano', 'tdi_ef_6_ano', 'tdi_ef_7_ano', 'tdi_ef_8_ano', 'tdi_ef_9_ano',\n", - " 'tdi_em', 'tdi_em_1_ano', 'tdi_em_2_ano', 'tdi_em_3_ano', 'tdi_em_4_ano', 'taxa_aprovacao_ef', \n", - " 'taxa_aprovacao_ef_anos_iniciais', 'taxa_aprovacao_ef_anos_finais', 'taxa_aprovacao_ef_1_ano',\n", - " 'taxa_aprovacao_ef_2_ano', 'taxa_aprovacao_ef_3_ano', 'taxa_aprovacao_ef_4_ano', 'taxa_aprovacao_ef_5_ano',\n", - " 'taxa_aprovacao_ef_6_ano', 'taxa_aprovacao_ef_7_ano', 'taxa_aprovacao_ef_8_ano', 'taxa_aprovacao_ef_9_ano',\n", - " 'taxa_aprovacao_em', 'taxa_aprovacao_em_1_ano', 'taxa_aprovacao_em_2_ano', 'taxa_aprovacao_em_3_ano',\n", - " 'taxa_aprovacao_em_4_ano', 'taxa_aprovacao_em_nao_seriado', 'taxa_reprovacao_ef', 'taxa_reprovacao_ef_anos_iniciais',\n", - " 'taxa_reprovacao_ef_anos_finais', 'taxa_reprovacao_ef_1_ano', 'taxa_reprovacao_ef_2_ano', 'taxa_reprovacao_ef_3_ano',\n", - " 'taxa_reprovacao_ef_4_ano', 'taxa_reprovacao_ef_5_ano', 'taxa_reprovacao_ef_6_ano', 'taxa_reprovacao_ef_7_ano',\n", - " 'taxa_reprovacao_ef_8_ano' ,'taxa_reprovacao_ef_9_ano', 'taxa_reprovacao_em', 'taxa_reprovacao_em_1_ano',\n", - " 'taxa_reprovacao_em_2_ano' ,'taxa_reprovacao_em_3_ano', 'taxa_reprovacao_em_4_ano' ,'taxa_reprovacao_em_nao_seriado',\n", - " 'taxa_abandono_ef' ,'taxa_abandono_ef_anos_iniciais', 'taxa_abandono_ef_anos_finais', 'taxa_abandono_ef_1_ano',\n", - " 'taxa_abandono_ef_2_ano' ,'taxa_abandono_ef_3_ano', 'taxa_abandono_ef_4_ano', 'taxa_abandono_ef_5_ano',\n", - " 'taxa_abandono_ef_6_ano', 'taxa_abandono_ef_7_ano', 'taxa_abandono_ef_8_ano' ,'taxa_abandono_ef_9_ano', \n", - " 'taxa_abandono_em', 'taxa_abandono_em_1_ano' ,'taxa_abandono_em_2_ano', 'taxa_abandono_em_3_ano', \n", - " 'taxa_abandono_em_4_ano', 'taxa_abandono_em_nao_seriado', 'tnr_ef', 'tnr_ef_anos_iniciais', \n", - " 'tnr_ef_anos_finais', 'tnr_ef_1_ano', 'tnr_ef_2_ano', 'tnr_ef_3_ano', 'tnr_ef_4_ano', 'tnr_ef_5_ano', \n", - " 'tnr_ef_6_ano', 'tnr_ef_7_ano', 'tnr_ef_8_ano', 'tnr_ef_9_ano', 'tnr_em' ,'tnr_em_1_ano', 'tnr_em_2_ano',\n", - " 'tnr_em_3_ano', 'tnr_em_4_ano', 'tnr_em_nao_seriado', 'dsu_ei', 'dsu_ei_creche', 'dsu_ei_pre_escola', \n", - " 'dsu_ef', 'dsu_ef_anos_iniciais', 'dsu_ef_anos_finais' ,'dsu_em', 'dsu_ep', 'dsu_eja', 'dsu_ee',\n", - " 'afd_ei_grupo_1', 'afd_ei_grupo_2', 'afd_ei_grupo_3', 'afd_ei_grupo_4', 'afd_ei_grupo_5', 'afd_ef_grupo_1',\n", - " 'afd_ef_grupo_2', 'afd_ef_grupo_3', 'afd_ef_grupo_4', 'afd_ef_grupo_5', 'afd_ef_anos_iniciais_grupo_1',\n", - " 'afd_ef_anos_iniciais_grupo_2', 'afd_ef_anos_iniciais_grupo_3', 'afd_ef_anos_iniciais_grupo_4',\n", - " 'afd_ef_anos_iniciais_grupo_5', 'afd_ef_anos_finais_grupo_1', 'afd_ef_anos_finais_grupo_2',\n", - " 'afd_ef_anos_finais_grupo_3', 'afd_ef_anos_finais_grupo_4', 'afd_ef_anos_finais_grupo_5', 'afd_em_grupo_1', \n", - " 'afd_em_grupo_2', 'afd_em_grupo_3', 'afd_em_grupo_4', 'afd_em_grupo_5', 'afd_eja_fundamental_grupo_1', \n", - " 'afd_eja_fundamental_grupo_2', 'afd_eja_fundamental_grupo_3' ,'afd_eja_fundamental_grupo_4',\n", - " 'afd_eja_fundamental_grupo_5' ,'afd_eja_medio_grupo_1', 'afd_eja_medio_grupo_2', 'afd_eja_medio_grupo_3',\n", - " 'afd_eja_medio_grupo_4', 'afd_eja_medio_grupo_5' ,'ird_media_regularidade_docente', 'ied_ef_nivel_1', \n", - " 'ied_ef_nivel_2', 'ied_ef_nivel_3', 'ied_ef_nivel_4', \n", - " 'ied_ef_nivel_5', 'ied_ef_nivel_6', 'ied_ef_anos_iniciais_nivel_1', 'ied_ef_anos_iniciais_nivel_2',\n", - " 'ied_ef_anos_iniciais_nivel_3', 'ied_ef_anos_iniciais_nivel_4', 'ied_ef_anos_iniciais_nivel_5',\n", - " 'ied_ef_anos_iniciais_nivel_6', 'ied_ef_anos_finais_nivel_1', 'ied_ef_anos_finais_nivel_2',\n", - " 'ied_ef_anos_finais_nivel_3', 'ied_ef_anos_finais_nivel_4', 'ied_ef_anos_finais_nivel_5',\n", - " 'ied_ef_anos_finais_nivel_6', 'ied_em_nivel_1' ,'ied_em_nivel_2', 'ied_em_nivel_3', 'ied_em_nivel_4',\n", - " 'ied_em_nivel_5', 'ied_em_nivel_6', 'icg_nivel_complexidade_gestao_escola']" - ], - "metadata": { - "id": "pixctEMyvoaH" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "#2022\n", - "rename_afd = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', 'ED_INF_CAT_1':'afd_ei_grupo_1', 'ED_INF_CAT_2':'afd_ei_grupo_2',\n", - " 'ED_INF_CAT_3':'afd_ei_grupo_3', 'ED_INF_CAT_4':'afd_ei_grupo_4', 'ED_INF_CAT_5':'afd_ei_grupo_5', 'FUN_CAT_1':'afd_ef_grupo_1', \n", - " 'FUN_CAT_2':'afd_ef_grupo_2', 'FUN_CAT_3':'afd_ef_grupo_3', 'FUN_CAT_4':'afd_ef_grupo_4', 'FUN_CAT_5':'afd_ef_grupo_5', 'FUN_AI_CAT_1':'afd_ef_anos_iniciais_grupo_1',\n", - " 'FUN_AI_CAT_2':'afd_ef_anos_iniciais_grupo_2', 'FUN_AI_CAT_3':'afd_ef_anos_iniciais_grupo_3', 'FUN_AI_CAT_4':'afd_ef_anos_iniciais_grupo_4',\n", - " 'FUN_AI_CAT_5':'afd_ef_anos_iniciais_grupo_5', 'FUN_AF_CAT_1':'afd_ef_anos_finais_grupo_1', 'FUN_AF_CAT_2':'afd_ef_anos_finais_grupo_2',\n", - " 'FUN_AF_CAT_3':'afd_ef_anos_finais_grupo_3', 'FUN_AF_CAT_4':'afd_ef_anos_finais_grupo_4', 'FUN_AF_CAT_5':'afd_ef_anos_finais_grupo_5',\n", - " 'MED_CAT_1':'afd_em_grupo_1', 'MED_CAT_2':'afd_em_grupo_2', 'MED_CAT_3':'afd_em_grupo_3', 'MED_CAT_4':'afd_em_grupo_4', 'MED_CAT_5':'afd_em_grupo_5',\n", - " 'EJA_FUN_CAT_1':'afd_eja_fundamental_grupo_1', 'EJA_FUN_CAT_2':'afd_eja_fundamental_grupo_2', 'EJA_FUN_CAT_3':'afd_eja_fundamental_grupo_3',\n", - " 'EJA_FUN_CAT_4':'afd_eja_fundamental_grupo_4', 'EJA_FUN_CAT_5':'afd_eja_fundamental_grupo_5', 'EJA_MED_CAT_1':'afd_eja_medio_grupo_1',\n", - " 'EJA_MED_CAT_2':'afd_eja_medio_grupo_2', 'EJA_MED_CAT_3':'afd_eja_medio_grupo_3', 'EJA_MED_CAT_4':'afd_eja_medio_grupo_4', \n", - " 'EJA_MED_CAT_5':'afd_eja_medio_grupo_5'}\n", - "\n", - "with ZipFile('/content/AFD_2022_ESCOLAS.zip') as z:\n", - " with z.open('AFD_2022_ESCOLAS/AFD_ESCOLAS_2022.xlsx') as f:\n", - " afd = pd.read_excel(f, skiprows=10, dtype=str)\n", - " afd.drop(range(175523, 175528), inplace=True)\n", - " afd = afd.replace('--', '')\n", - " afd.rename(columns=rename_afd, inplace=True)\n", - " afd['localizacao'] = afd['localizacao'].str.lower().replace('pública', 'publica')\n", - " afd['rede'] = afd['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_atu = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'atu_ei', 'CRE_CAT_0':'atu_ei_creche', 'PRE_CAT_0':'atu_ei_pre_escola', 'FUN_CAT_0':'atu_ef',\n", - " 'FUN_AI_CAT_0':'atu_ef_anos_iniciais', 'FUN_AF_CAT_0':'atu_ef_anos_finais', 'FUN_01_CAT_0':'atu_ef_1_ano',\n", - " 'FUN_02_CAT_0':'atu_ef_2_ano', 'FUN_03_CAT_0':'atu_ef_3_ano', 'FUN_04_CAT_0':'atu_ef_4_ano',\n", - " 'FUN_05_CAT_0':'atu_ef_5_ano', 'FUN_06_CAT_0':'atu_ef_6_ano', 'FUN_07_CAT_0':'atu_ef_7_ano', \n", - " 'FUN_08_CAT_0':'atu_ef_8_ano', 'FUN_09_CAT_0':'atu_ef_9_ano', 'MULT_ETA_CAT_0':'atu_ef_turmas_unif_multi_fluxo',\n", - " 'MED_CAT_0':'atu_em', 'MED_01_CAT_0':'atu_em_1_ano', 'MED_02_CAT_0':'atu_em_2_ano', 'MED_03_CAT_0':'atu_em_3_ano',\n", - " 'MED_04_CAT_0':'atu_em_4_ano', 'MED_NS_CAT_0':'atu_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/ATU_2022_ESCOLAS.zip') as z:\n", - " with z.open('ATU_2022_ESCOLAS/ATU_ESCOLAS_2022.xlsx') as f:\n", - " atu = pd.read_excel(f, skiprows=8, dtype=str)\n", - " atu.drop(range(174319, 174322), inplace=True)\n", - " atu = atu.replace('--', '')\n", - " atu.rename(columns=rename_atu, inplace=True)\n", - " atu['localizacao'] = atu['localizacao'].str.lower().replace('pública', 'publica')\n", - " atu['rede'] = atu['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_dsu = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'dsu_ei', 'CRE_CAT_0':'dsu_ei_creche', 'PRE_CAT_0':'dsu_ei_pre_escola', 'FUN_CAT_0':'dsu_ef',\n", - " 'FUN_AI_CAT_0':'dsu_ef_anos_iniciais', 'FUN_AF_CAT_0':'dsu_ef_anos_finais', 'MED_CAT_0':'dsu_em',\n", - " 'PROF_CAT_0':'dsu_ep', 'EJA_CAT_0':'dsu_eja', 'EDU_BAS_CAT_0':'dsu_ee'}\n", - "\n", - "with ZipFile('/content/DSU_2022_ESCOLAS.zip') as z:\n", - " with z.open('DSU_2022_ESCOLAS/DSU_ESCOLAS_2022.xlsx') as f:\n", - " dsu = pd.read_excel(f, skiprows=9, dtype=str)\n", - " dsu.drop(range(178111, 178113), inplace=True)\n", - " dsu = dsu.replace('--', '')\n", - " dsu.rename(columns=rename_dsu, inplace=True)\n", - " dsu['localizacao'] = dsu['localizacao'].str.lower().replace('pública', 'publica')\n", - " dsu['rede'] = dsu['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_had = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'ED_INF_CAT_0':'had_ei', 'CRE_CAT_0':'had_ei_creche', 'PRE_CAT_0':'had_ei_pre_escola',\n", - " 'FUN_CAT_0':'had_ef', 'FUN_AI_CAT_0':'had_ef_anos_iniciais', 'FUN_AF_CAT_0':'had_ef_anos_finais', 'FUN_01_CAT_0':'had_ef_1_ano',\n", - " 'FUN_02_CAT_0':'had_ef_2_ano', 'FUN_03_CAT_0':'had_ef_3_ano', 'FUN_04_CAT_0':'had_ef_4_ano', 'FUN_05_CAT_0':'had_ef_5_ano',\n", - " 'FUN_06_CAT_0':'had_ef_6_ano', 'FUN_07_CAT_0':'had_ef_7_ano', 'FUN_08_CAT_0':'had_ef_8_ano', 'FUN_09_CAT_0':'had_ef_9_ano',\n", - " 'MED_CAT_0':'had_em', 'MED_01_CAT_0':'had_em_1_ano', 'MED_02_CAT_0':'had_em_2_ano', 'MED_03_CAT_0':'had_em_3_ano',\n", - " 'MED_04_CAT_0':'had_em_4_ano', 'MED_NS_CAT_0':'had_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/HAD_2022_ESCOLAS.zip') as z:\n", - " with z.open('HAD_2022_ESCOLAS/HAD_ESCOLAS_2022.xlsx') as f:\n", - " had = pd.read_excel(f, skiprows=8, dtype=str)\n", - " had.drop(range(173795, 173798), inplace=True)\n", - " had = had.replace('--', '')\n", - " had.rename(columns=rename_had, inplace=True)\n", - " had['localizacao'] = had['localizacao'].str.lower().replace('pública', 'publica')\n", - " had['rede'] = had['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_icg = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'COMPLEX':'icg_nivel_complexidade_gestao_escola'}\n", - "\n", - "with ZipFile('/content/ICG_2022_ESCOLAS.zip') as z:\n", - " with z.open('ICG_2022_ESCOLAS/ICG_ESCOLAS_2022.xlsx') as f:\n", - " icg = pd.read_excel(f, skiprows=10, dtype=str)\n", - " icg.drop(range(178346, 178351), inplace=True)\n", - " icg = icg.replace('--', '')\n", - " icg.rename(columns=rename_icg, inplace=True)\n", - " icg['localizacao'] = icg['localizacao'].str.lower().replace('pública', 'publica')\n", - " icg['rede'] = icg['rede'].str.lower().replace('pública', 'publica')\n", - " icg['icg_nivel_complexidade_gestao_escola'] = icg['icg_nivel_complexidade_gestao_escola'].str.lower()\n", - " icg['icg_nivel_complexidade_gestao_escola']= icg['icg_nivel_complexidade_gestao_escola'].apply(lambda x: str(x).replace('Nível', 'nivel'))\n", - "\n", - "rename_ied = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'FUN_CAT_1':'ied_ef_nivel_1', 'FUN_CAT_2':'ied_ef_nivel_2', 'FUN_CAT_3':'ied_ef_nivel_3',\n", - " 'FUN_CAT_4':'ied_ef_nivel_4', 'FUN_CAT_5':'ied_ef_nivel_5', 'FUN_CAT_6':'ied_ef_nivel_6',\n", - " 'FUN_AI_CAT_1':'ied_ef_anos_iniciais_nivel_1', 'FUN_AI_CAT_2':'ied_ef_anos_iniciais_nivel_2',\n", - " 'FUN_AI_CAT_3':'ied_ef_anos_iniciais_nivel_3', 'FUN_AI_CAT_4':'ied_ef_anos_iniciais_nivel_4',\n", - " 'FUN_AI_CAT_5':'ied_ef_anos_iniciais_nivel_5', 'FUN_AI_CAT_6':'ied_ef_anos_iniciais_nivel_6',\n", - " 'FUN_AF_CAT_1':'ied_ef_anos_finais_nivel_1', 'FUN_AF_CAT_2':'ied_ef_anos_finais_nivel_2',\n", - " 'FUN_AF_CAT_3':'ied_ef_anos_finais_nivel_3', 'FUN_AF_CAT_4':'ied_ef_anos_finais_nivel_4',\n", - " 'FUN_AF_CAT_5':'ied_ef_anos_finais_nivel_5', 'FUN_AF_CAT_6':'ied_ef_anos_finais_nivel_6',\n", - " 'MED_CAT_1':'ied_em_nivel_1', 'MED_CAT_2':'ied_em_nivel_2', 'MED_CAT_3':'ied_em_nivel_3',\n", - " 'MED_CAT_4':'ied_em_nivel_4', 'MED_CAT_5':'ied_em_nivel_5', 'MED_CAT_6':'ied_em_nivel_6'}\n", - "\n", - "with ZipFile('/content/IED_2022_ESCOLAS.zip') as z:\n", - " with z.open('IED_2022_ESCOLAS/IED_ESCOLAS_2022.xlsx') as f:\n", - " ied = pd.read_excel(f, skiprows=10, dtype=str)\n", - " ied.drop(range(130907, 130912), inplace=True)\n", - " ied = ied.replace('--', '')\n", - " ied.rename(columns=rename_ied, inplace=True)\n", - " ied['localizacao'] = ied['localizacao'].str.lower().replace('pública', 'publica')\n", - " ied['rede'] = ied['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_ird = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'EDU_BAS_CAT_0':'ird_media_regularidade_docente'}\n", - "\n", - "with ZipFile('/content/IRD_2022_ESCOLAS.zip') as z:\n", - " with z.open('IRD_2022_ESCOLAS/IRD_ESCOLAS_2022.xlsx') as f:\n", - " ird = pd.read_excel(f, skiprows=10, dtype=str)\n", - " ird.drop(range(165721, 165726), inplace=True)\n", - " ird = ird.replace('--', '')\n", - " ird.rename(columns=rename_ird, inplace=True)\n", - " ird['localizacao'] = ird['localizacao'].str.lower().replace('pública', 'publica')\n", - " ird['rede'] = ird['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "rename_tdi = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", - " 'FUN_CAT_0':'tdi_ef', 'FUN_AI_CAT_0':'tdi_ef_anos_iniciais', 'FUN_AF_CAT_0':'tdi_ef_anos_finais',\n", - " 'FUN_01_CAT_0':'tdi_ef_1_ano', 'FUN_02_CAT_0':'tdi_ef_2_ano', 'FUN_03_CAT_0':'tdi_ef_3_ano',\n", - " 'FUN_04_CAT_0':'tdi_ef_4_ano', 'FUN_05_CAT_0':'tdi_ef_5_ano', 'FUN_06_CAT_0':'tdi_ef_6_ano',\n", - " 'FUN_07_CAT_0':'tdi_ef_7_ano', 'FUN_08_CAT_0':'tdi_ef_8_ano', 'FUN_09_CAT_0':'tdi_ef_9_ano',\n", - " 'MED_CAT_0':'tdi_em', 'MED_01_CAT_0':'tdi_em_1_ano', 'MED_02_CAT_0':'tdi_em_2_ano',\n", - " 'MED_03_CAT_0':'tdi_em_3_ano', 'MED_04_CAT_0':'tdi_em_4_ano'}\n", - "\n", - "with ZipFile('/content/TDI_2022_ESCOLAS.zip') as z:\n", - " with z.open('TDI_2022_ESCOLAS/TDI_ESCOLAS_2022.xlsx') as f:\n", - " tdi = pd.read_excel(f, skiprows=8, dtype=str)\n", - " tdi.drop(range(129174, 129176), inplace=True)\n", - " tdi = tdi.replace('--', '')\n", - " tdi.rename(columns=rename_tdi, inplace=True)\n", - " tdi['localizacao'] = tdi['localizacao'].str.lower().replace('pública', 'publica')\n", - " tdi['rede'] = tdi['rede'].str.lower().replace('pública', 'publica')\n", - "\n", - "# rename_tnr = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", - "# '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", - "# '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", - "# '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", - "# '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", - "# '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", - "\n", - "# with ZipFile('/content/tnr_municipios_2021.zip') as z:\n", - "# with z.open('tnr_municipios_2021/tnr_municipios_2021.xlsx') as f:\n", - "# tnr = pd.read_excel(f, skiprows=8, dtype=str)\n", - "# tnr.drop(range(65574, 65577), inplace=True)\n", - "# tnr = tnr.replace('--', '')\n", - "# tnr.rename(columns=rename_tnr, inplace=True)\n", - "# tnr[index] = tnr[index].astype(str)\n", - "# tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", - "# tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", - " \n", - "\n", - "# rename_tx = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", - "# '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", - "# '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", - "# '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", - "# '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", - "# '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", - "# '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", - "# '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", - "# '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", - "# '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", - "# '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", - "# '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", - "# '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", - "# '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", - "# '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", - "# '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", - "# '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", - "# '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", - "# '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", - "\n", - "# with ZipFile('/content/tx_rend_municipios_2021.zip') as z:\n", - "# with z.open('tx_rend_municipios_2021/tx_rend_municipios_2021.xlsx') as f:\n", - "# tx = pd.read_excel(f, skiprows=8, dtype=str)\n", - "# tx.drop(range(65574, 65576), inplace=True)\n", - "# tx = tx.replace('--', '')\n", - "# tx.rename(columns=rename_tx, inplace=True)\n", - "# tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", - "# tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", - " \n", - "index = ['ano',\t'NO_REGIAO',\t'SG_UF',\t'id_municipio',\t'NO_MUNICIPIO',\t'localizacao',\t'rede', 'id_escola',\t'NO_ENTIDADE'] \n", - "df = pd.merge(afd, atu, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, dsu, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, had, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, icg, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, ied, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, ird, how='outer', left_on=index, right_on=index)\n", - "df = pd.merge(df, tdi, how='outer', left_on=index, right_on=index)\n", - "# df = pd.merge(df, tnr, how='outer', left_on=index, right_on=index)\n", - "# df = pd.merge(df, tx, how='outer', left_on=index, right_on=index)\n", - "#COMO ALGUNS DADOS NÃO FORAM ATUALIZADOS, CRIA-SE DF2 COMO COMPLEMENTAR PARA A ORDENAÇÃO DA BASE\n", - "df_list = df.columns\n", - "df2 = list(set(ordem) - set(df_list))\n", - "df = pd.concat([df, pd.DataFrame(columns=df2)])\n", - "#ORDENA TODAS AS VARIÁVEIS \n", - "df = df[ordem]\n", - "df.to_csv('/content/escola.csv', index=False, encoding='utf-8', na_rep='')" - ], - "metadata": { - "id": "oBnitg2gs1C6" - }, - "execution_count": null, - "outputs": [] + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "pd.set_option(\"display.max_columns\", None)\n", + "import basedosdados as bd\n", + "import zipfile\n", + "from zipfile import ZipFile" + ], + "metadata": { + "id": "dSbj4uDkOu0n" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Brasil" + ], + "metadata": { + "id": "H2KTESTddnNE" + } + }, + { + "cell_type": "code", + "source": [ + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/AFD_2022_BRASIL_REGIOES_UF.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ICG_2022_BRASIL_REGIOES_UFS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IED_2022_BRASIL_REGIOES_UFS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ATU_2022_BRASIL_REGIOES_UFS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/HAD_2022_BRASIL_REGIOES_UFS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/DSU_2022_BRASIL_REGIOES_UFS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IRD_2022_BRASIL_REGIOES_UFS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/TDI_2022_BRASIL_REGIOES_UFS.zip" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "8UuaE8D1ezML", + "outputId": "640eaa3c-c4b6-46af-a234-61e65d6f1b1e" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "source": [ - "query = ''' \n", - "SELECT * FROM `basedosdados-dev.br_inep_indicadores_educacionais.escola` WHERE ano = 2021\n", - "'''\n", - "\n", - "df_escola = bd.read_sql(query, billing_project_id='input-bd')" - ], - "metadata": { - "id": "Ue9hLyUZOxni" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "--2023-02-11 12:42:18-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/AFD_2022_BRASIL_REGIOES_UF.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 279399 (273K) [application/zip]\n", + "Saving to: ‘AFD_2022_BRASIL_REGIOES_UF.zip’\n", + "\n", + "AFD_2022_BRASIL_REG 100%[===================>] 272.85K 347KB/s in 0.8s \n", + "\n", + "2023-02-11 12:42:20 (347 KB/s) - ‘AFD_2022_BRASIL_REGIOES_UF.zip’ saved [279399/279399]\n", + "\n", + "--2023-02-11 12:42:20-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ICG_2022_BRASIL_REGIOES_UFS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 91142 (89K) [application/zip]\n", + "Saving to: ‘ICG_2022_BRASIL_REGIOES_UFS.zip’\n", + "\n", + "ICG_2022_BRASIL_REG 100%[===================>] 89.01K 189KB/s in 0.5s \n", + "\n", + "2023-02-11 12:42:21 (189 KB/s) - ‘ICG_2022_BRASIL_REGIOES_UFS.zip’ saved [91142/91142]\n", + "\n", + "--2023-02-11 12:42:21-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IED_2022_BRASIL_REGIOES_UFS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 217268 (212K) [application/zip]\n", + "Saving to: ‘IED_2022_BRASIL_REGIOES_UFS.zip’\n", + "\n", + "IED_2022_BRASIL_REG 100%[===================>] 212.18K 193KB/s in 1.1s \n", + "\n", + "2023-02-11 12:42:23 (193 KB/s) - ‘IED_2022_BRASIL_REGIOES_UFS.zip’ saved [217268/217268]\n", + "\n", + "--2023-02-11 12:42:23-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ATU_2022_BRASIL_REGIOES_UFS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 198113 (193K) [application/zip]\n", + "Saving to: ‘ATU_2022_BRASIL_REGIOES_UFS.zip’\n", + "\n", + "ATU_2022_BRASIL_REG 100%[===================>] 193.47K 308KB/s in 0.6s \n", + "\n", + "2023-02-11 12:42:24 (308 KB/s) - ‘ATU_2022_BRASIL_REGIOES_UFS.zip’ saved [198113/198113]\n", + "\n", + "--2023-02-11 12:42:25-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/HAD_2022_BRASIL_REGIOES_UFS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 84853 (83K) [application/zip]\n", + "Saving to: ‘HAD_2022_BRASIL_REGIOES_UFS.zip’\n", + "\n", + "HAD_2022_BRASIL_REG 100%[===================>] 82.86K 233KB/s in 0.4s \n", + "\n", + "2023-02-11 12:42:26 (233 KB/s) - ‘HAD_2022_BRASIL_REGIOES_UFS.zip’ saved [84853/84853]\n", + "\n", + "--2023-02-11 12:42:26-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/DSU_2022_BRASIL_REGIOES_UFS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 118154 (115K) [application/zip]\n", + "Saving to: ‘DSU_2022_BRASIL_REGIOES_UFS.zip’\n", + "\n", + "DSU_2022_BRASIL_REG 100%[===================>] 115.38K 184KB/s in 0.6s \n", + "\n", + "2023-02-11 12:42:27 (184 KB/s) - ‘DSU_2022_BRASIL_REGIOES_UFS.zip’ saved [118154/118154]\n", + "\n", + "--2023-02-11 12:42:27-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IRD_2022_BRASIL_REGIOES_UFS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 81096 (79K) [application/zip]\n", + "Saving to: ‘IRD_2022_BRASIL_REGIOES_UFS.zip’\n", + "\n", + "IRD_2022_BRASIL_REG 100%[===================>] 79.20K 252KB/s in 0.3s \n", + "\n", + "2023-02-11 12:42:28 (252 KB/s) - ‘IRD_2022_BRASIL_REGIOES_UFS.zip’ saved [81096/81096]\n", + "\n", + "--2023-02-11 12:42:28-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/TDI_2022_BRASIL_REGIOES_UFS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 172134 (168K) [application/zip]\n", + "Saving to: ‘TDI_2022_BRASIL_REGIOES_UFS.zip’\n", + "\n", + "TDI_2022_BRASIL_REG 100%[===================>] 168.10K 267KB/s in 0.6s \n", + "\n", + "2023-02-11 12:42:30 (267 KB/s) - ‘TDI_2022_BRASIL_REGIOES_UFS.zip’ saved [172134/172134]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "ordem = ['localizacao', 'rede', 'atu_ei', 'atu_ei_creche', 'atu_ei_pre_escola', 'atu_ef', 'atu_ef_anos_iniciais',\n", + " 'atu_ef_anos_finais', 'atu_ef_1_ano', 'atu_ef_2_ano', 'atu_ef_3_ano', 'atu_ef_4_ano', 'atu_ef_5_ano',\n", + " 'atu_ef_6_ano', 'atu_ef_7_ano', 'atu_ef_8_ano', 'atu_ef_9_ano', 'atu_ef_turmas_unif_multi_fluxo',\n", + " 'atu_em', 'atu_em_1_ano', 'atu_em_2_ano', 'atu_em_3_ano', 'atu_em_4_ano', 'atu_em_nao_seriado', 'had_ei',\n", + " 'had_ei_creche', 'had_ei_pre_escola', 'had_ef', 'had_ef_anos_iniciais', 'had_ef_anos_finais', 'had_ef_1_ano',\n", + " 'had_ef_2_ano', 'had_ef_3_ano', 'had_ef_4_ano', 'had_ef_5_ano', 'had_ef_6_ano', 'had_ef_7_ano',\n", + " 'had_ef_8_ano', 'had_ef_9_ano', 'had_em', 'had_em_1_ano', 'had_em_2_ano', 'had_em_3_ano', 'had_em_4_ano',\n", + " 'had_em_nao_seriado', 'tdi_ef', 'tdi_ef_anos_iniciais', 'tdi_ef_anos_finais', 'tdi_ef_1_ano', 'tdi_ef_2_ano',\n", + " 'tdi_ef_3_ano', 'tdi_ef_4_ano', 'tdi_ef_5_ano', 'tdi_ef_6_ano', 'tdi_ef_7_ano', 'tdi_ef_8_ano', 'tdi_ef_9_ano',\n", + " 'tdi_em', 'tdi_em_1_ano', 'tdi_em_2_ano', 'tdi_em_3_ano', 'tdi_em_4_ano', 'taxa_aprovacao_ef', \n", + " 'taxa_aprovacao_ef_anos_iniciais', 'taxa_aprovacao_ef_anos_finais', 'taxa_aprovacao_ef_1_ano',\n", + " 'taxa_aprovacao_ef_2_ano', 'taxa_aprovacao_ef_3_ano', 'taxa_aprovacao_ef_4_ano', 'taxa_aprovacao_ef_5_ano',\n", + " 'taxa_aprovacao_ef_6_ano', 'taxa_aprovacao_ef_7_ano', 'taxa_aprovacao_ef_8_ano', 'taxa_aprovacao_ef_9_ano',\n", + " 'taxa_aprovacao_em', 'taxa_aprovacao_em_1_ano', 'taxa_aprovacao_em_2_ano', 'taxa_aprovacao_em_3_ano',\n", + " 'taxa_aprovacao_em_4_ano', 'taxa_aprovacao_em_nao_seriado', 'taxa_reprovacao_ef', 'taxa_reprovacao_ef_anos_iniciais',\n", + " 'taxa_reprovacao_ef_anos_finais', 'taxa_reprovacao_ef_1_ano', 'taxa_reprovacao_ef_2_ano', 'taxa_reprovacao_ef_3_ano',\n", + " 'taxa_reprovacao_ef_4_ano', 'taxa_reprovacao_ef_5_ano', 'taxa_reprovacao_ef_6_ano', 'taxa_reprovacao_ef_7_ano',\n", + " 'taxa_reprovacao_ef_8_ano' ,'taxa_reprovacao_ef_9_ano', 'taxa_reprovacao_em', 'taxa_reprovacao_em_1_ano',\n", + " 'taxa_reprovacao_em_2_ano' ,'taxa_reprovacao_em_3_ano', 'taxa_reprovacao_em_4_ano' ,'taxa_reprovacao_em_nao_seriado',\n", + " 'taxa_abandono_ef' ,'taxa_abandono_ef_anos_iniciais', 'taxa_abandono_ef_anos_finais', 'taxa_abandono_ef_1_ano', \n", + " 'taxa_abandono_ef_2_ano' ,'taxa_abandono_ef_3_ano', 'taxa_abandono_ef_4_ano', 'taxa_abandono_ef_5_ano', \n", + " 'taxa_abandono_ef_6_ano', 'taxa_abandono_ef_7_ano', 'taxa_abandono_ef_8_ano' ,'taxa_abandono_ef_9_ano', \n", + " 'taxa_abandono_em', 'taxa_abandono_em_1_ano' ,'taxa_abandono_em_2_ano', 'taxa_abandono_em_3_ano', \n", + " 'taxa_abandono_em_4_ano', 'taxa_abandono_em_nao_seriado', 'tnr_ef', 'tnr_ef_anos_iniciais', 'tnr_ef_anos_finais', \n", + " 'tnr_ef_1_ano', 'tnr_ef_2_ano', 'tnr_ef_3_ano', 'tnr_ef_4_ano', 'tnr_ef_5_ano', 'tnr_ef_6_ano', 'tnr_ef_7_ano',\n", + " 'tnr_ef_8_ano', 'tnr_ef_9_ano', 'tnr_em' ,'tnr_em_1_ano', 'tnr_em_2_ano', 'tnr_em_3_ano', 'tnr_em_4_ano', \n", + " 'tnr_em_nao_seriado', 'dsu_ei', 'dsu_ei_creche', 'dsu_ei_pre_escola', 'dsu_ef', 'dsu_ef_anos_iniciais', \n", + " 'dsu_ef_anos_finais' ,'dsu_em', 'dsu_ep', 'dsu_eja', 'dsu_ee', 'afd_ei_grupo_1', 'afd_ei_grupo_2', \n", + " 'afd_ei_grupo_3', 'afd_ei_grupo_4', 'afd_ei_grupo_5', 'afd_ef_grupo_1' ,'afd_ef_grupo_2', 'afd_ef_grupo_3',\n", + " 'afd_ef_grupo_4', 'afd_ef_grupo_5', 'afd_ef_anos_iniciais_grupo_1', 'afd_ef_anos_iniciais_grupo_2', \n", + " 'afd_ef_anos_iniciais_grupo_3', 'afd_ef_anos_iniciais_grupo_4' ,'afd_ef_anos_iniciais_grupo_5',\n", + " 'afd_ef_anos_finais_grupo_1', 'afd_ef_anos_finais_grupo_2', 'afd_ef_anos_finais_grupo_3', \n", + " 'afd_ef_anos_finais_grupo_4', 'afd_ef_anos_finais_grupo_5', 'afd_em_grupo_1', 'afd_em_grupo_2', 'afd_em_grupo_3', \n", + " 'afd_em_grupo_4', 'afd_em_grupo_5', 'afd_eja_fundamental_grupo_1', 'afd_eja_fundamental_grupo_2', \n", + " 'afd_eja_fundamental_grupo_3' ,'afd_eja_fundamental_grupo_4', 'afd_eja_fundamental_grupo_5',\n", + " 'afd_eja_medio_grupo_1', 'afd_eja_medio_grupo_2', 'afd_eja_medio_grupo_3' ,'afd_eja_medio_grupo_4', \n", + " 'afd_eja_medio_grupo_5' ,'ird_baixa_regularidade' ,'ird_media_baixa', 'ird_media_alta', 'ird_alta', \n", + " 'ied_ef_nivel_1' ,'ied_ef_nivel_2', 'ied_ef_nivel_3', 'ied_ef_nivel_4', 'ied_ef_nivel_5', 'ied_ef_nivel_6',\n", + " 'ied_ef_anos_iniciais_nivel_1', 'ied_ef_anos_iniciais_nivel_2', 'ied_ef_anos_iniciais_nivel_3', \n", + " 'ied_ef_anos_iniciais_nivel_4', 'ied_ef_anos_iniciais_nivel_5' ,'ied_ef_anos_iniciais_nivel_6',\n", + " 'ied_ef_anos_finais_nivel_1', 'ied_ef_anos_finais_nivel_2', 'ied_ef_anos_finais_nivel_3', \n", + " 'ied_ef_anos_finais_nivel_4', 'ied_ef_anos_finais_nivel_5' ,'ied_ef_anos_finais_nivel_6',\n", + " 'ied_em_nivel_1' ,'ied_em_nivel_2', 'ied_em_nivel_3', 'ied_em_nivel_4', 'ied_em_nivel_5', 'ied_em_nivel_6', \n", + " 'icg_nivel_1', 'icg_nivel_2', 'icg_nivel_3', 'icg_nivel_4', 'icg_nivel_5', 'icg_nivel_6']" + ], + "metadata": { + "id": "ZxwfRbG6iOhJ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "rename_afd = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', 'ED_INF_CAT_1':'afd_ei_grupo_1', 'ED_INF_CAT_2':'afd_ei_grupo_2',\n", + " 'ED_INF_CAT_3':'afd_ei_grupo_3', 'ED_INF_CAT_4':'afd_ei_grupo_4', 'ED_INF_CAT_5':'afd_ei_grupo_5', 'FUN_CAT_1':'afd_ef_grupo_1', \n", + " 'FUN_CAT_2':'afd_ef_grupo_2', 'FUN_CAT_3':'afd_ef_grupo_3', 'FUN_CAT_4':'afd_ef_grupo_4', 'FUN_CAT_5':'afd_ef_grupo_5', 'FUN_AI_CAT_1':'afd_ef_anos_iniciais_grupo_1',\n", + " 'FUN_AI_CAT_2':'afd_ef_anos_iniciais_grupo_2', 'FUN_AI_CAT_3':'afd_ef_anos_iniciais_grupo_3', 'FUN_AI_CAT_4':'afd_ef_anos_iniciais_grupo_4',\n", + " 'FUN_AI_CAT_5':'afd_ef_anos_iniciais_grupo_5', 'FUN_AF_CAT_1':'afd_ef_anos_finais_grupo_1', 'FUN_AF_CAT_2':'afd_ef_anos_finais_grupo_2',\n", + " 'FUN_AF_CAT_3':'afd_ef_anos_finais_grupo_3', 'FUN_AF_CAT_4':'afd_ef_anos_finais_grupo_4', 'FUN_AF_CAT_5':'afd_ef_anos_finais_grupo_5',\n", + " 'MED_CAT_1':'afd_em_grupo_1', 'MED_CAT_2':'afd_em_grupo_2', 'MED_CAT_3':'afd_em_grupo_3', 'MED_CAT_4':'afd_em_grupo_4', 'MED_CAT_5':'afd_em_grupo_5',\n", + " 'EJA_FUN_CAT_1':'afd_eja_fundamental_grupo_1', 'EJA_FUN_CAT_2':'afd_eja_fundamental_grupo_2', 'EJA_FUN_CAT_3':'afd_eja_fundamental_grupo_3',\n", + " 'EJA_FUN_CAT_4':'afd_eja_fundamental_grupo_4', 'EJA_FUN_CAT_5':'afd_eja_fundamental_grupo_5', 'EJA_MED_CAT_1':'afd_eja_medio_grupo_1',\n", + " 'EJA_MED_CAT_2':'afd_eja_medio_grupo_2', 'EJA_MED_CAT_3':'afd_eja_medio_grupo_3', 'EJA_MED_CAT_4':'afd_eja_medio_grupo_4', \n", + " 'EJA_MED_CAT_5':'afd_eja_medio_grupo_5'}\n", + "\n", + "with ZipFile('/content/AFD_2022_BRASIL_REGIOES_UF.zip') as z:\n", + " with z.open('AFD_2022_BRASIL_REGIOES_UF/AFD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " afd = pd.read_excel(f, skiprows=10, dtype=str)\n", + " afd = afd.replace('--', '')\n", + " afd = afd[afd.UNIDGEO.isin(['Brasil'])]\n", + " afd.rename(columns=rename_afd, inplace=True)\n", + " afd['localizacao'] = afd['localizacao'].str.lower().replace('pública', 'publica')\n", + " afd['rede'] = afd['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_atu = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'atu_ei', 'CRE_CAT_0':'atu_ei_creche', 'PRE_CAT_0':'atu_ei_pre_escola', 'FUN_CAT_0':'atu_ef',\n", + " 'FUN_AI_CAT_0':'atu_ef_anos_iniciais', 'FUN_AF_CAT_0':'atu_ef_anos_finais', 'FUN_01_CAT_0':'atu_ef_1_ano',\n", + " 'FUN_02_CAT_0':'atu_ef_2_ano', 'FUN_03_CAT_0':'atu_ef_3_ano', 'FUN_04_CAT_0':'atu_ef_4_ano',\n", + " 'FUN_05_CAT_0':'atu_ef_5_ano', 'FUN_06_CAT_0':'atu_ef_6_ano', 'FUN_07_CAT_0':'atu_ef_7_ano', \n", + " 'FUN_08_CAT_0':'atu_ef_8_ano', 'FUN_09_CAT_0':'atu_ef_9_ano', 'MULT_ETA_CAT_0':'atu_ef_turmas_unif_multi_fluxo',\n", + " 'MED_CAT_0':'atu_em', 'MED_01_CAT_0':'atu_em_1_ano', 'MED_02_CAT_0':'atu_em_2_ano', 'MED_03_CAT_0':'atu_em_3_ano',\n", + " 'MED_04_CAT_0':'atu_em_4_ano', 'MED_NS_CAT_0':'atu_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/ATU_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('ATU_2022_BRASIL_REGIOES_UFS/ATU_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " atu = pd.read_excel(f, skiprows=8, dtype=str)\n", + " atu = atu[atu.UNIDGEO.isin(['Brasil'])]\n", + " atu = atu.replace('--', '')\n", + " atu.rename(columns=rename_atu, inplace=True)\n", + " atu['localizacao'] = atu['localizacao'].str.lower().replace('pública', 'publica')\n", + " atu['rede'] = atu['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_dsu = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'dsu_ei', 'CRE_CAT_0':'dsu_ei_creche', 'PRE_CAT_0':'dsu_ei_pre_escola', 'FUN_CAT_0':'dsu_ef',\n", + " 'FUN_AI_CAT_0':'dsu_ef_anos_iniciais', 'FUN_AF_CAT_0':'dsu_ef_anos_finais', 'MED_CAT_0':'dsu_em',\n", + " 'PROF_CAT_0':'dsu_ep', 'EJA_CAT_0':'dsu_eja', 'EDU_BAS_CAT_0':'dsu_ee'}\n", + "\n", + "with ZipFile('/content/DSU_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('DSU_2022_BRASIL_REGIOES_UFS/DSU_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " dsu = pd.read_excel(f, skiprows=9, dtype=str)\n", + " dsu = dsu[dsu.UNIDGEO.isin(['Brasil'])]\n", + " dsu = dsu.replace('--', '')\n", + " dsu.rename(columns=rename_dsu, inplace=True)\n", + " dsu['localizacao'] = dsu['localizacao'].str.lower().replace('pública', 'publica')\n", + " dsu['rede'] = dsu['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_had = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'had_ei', 'CRE_CAT_0':'had_ei_creche', 'PRE_CAT_0':'had_ei_pre_escola',\n", + " 'FUN_CAT_0':'had_ef', 'FUN_AI_CAT_0':'had_ef_anos_iniciais', 'FUN_AF_CAT_0':'had_ef_anos_finais', 'FUN_01_CAT_0':'had_ef_1_ano',\n", + " 'FUN_02_CAT_0':'had_ef_2_ano', 'FUN_03_CAT_0':'had_ef_3_ano', 'FUN_04_CAT_0':'had_ef_4_ano', 'FUN_05_CAT_0':'had_ef_5_ano',\n", + " 'FUN_06_CAT_0':'had_ef_6_ano', 'FUN_07_CAT_0':'had_ef_7_ano', 'FUN_08_CAT_0':'had_ef_8_ano', 'FUN_09_CAT_0':'had_ef_9_ano',\n", + " 'MED_CAT_0':'had_em', 'MED_01_CAT_0':'had_em_1_ano', 'MED_02_CAT_0':'had_em_2_ano', 'MED_03_CAT_0':'had_em_3_ano',\n", + " 'MED_04_CAT_0':'had_em_4_ano', 'MED_NS_CAT_0':'had_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/HAD_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('HAD_2022_BRASIL_REGIOES_UFS/HAD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " had = pd.read_excel(f, skiprows=8, dtype=str)\n", + " had = had[had.UNIDGEO.isin(['Brasil'])]\n", + " had = had.replace('--', '')\n", + " had.rename(columns=rename_had, inplace=True)\n", + " had['localizacao'] = had['localizacao'].str.lower().replace('pública', 'publica')\n", + " had['rede'] = had['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_icg = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'EDU_BAS_CAT_1':'icg_nivel_1', 'EDU_BAS_CAT_2':'icg_nivel_2', 'EDU_BAS_CAT_3':'icg_nivel_3',\n", + " 'EDU_BAS_CAT_4':'icg_nivel_4', 'EDU_BAS_CAT_5':'icg_nivel_5', 'EDU_BAS_CAT_6':'icg_nivel_6'}\n", + "\n", + "with ZipFile('/content/ICG_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('ICG_2022_BRASIL_REGIOES_UFS/ICG_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " icg = pd.read_excel(f, skiprows=8, dtype=str)\n", + " icg = icg[icg.UNIDGEO.isin(['Brasil'])]\n", + " icg = icg.replace('--', '')\n", + " icg.rename(columns=rename_icg, inplace=True)\n", + " icg['localizacao'] = icg['localizacao'].str.lower().replace('pública', 'publica')\n", + " icg['rede'] = icg['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_ied = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'FUN_CAT_1':'ied_ef_nivel_1', 'FUN_CAT_2':'ied_ef_nivel_2', 'FUN_CAT_3':'ied_ef_nivel_3',\n", + " 'FUN_CAT_4':'ied_ef_nivel_4', 'FUN_CAT_5':'ied_ef_nivel_5', 'FUN_CAT_6':'ied_ef_nivel_6',\n", + " 'FUN_AI_CAT_1':'ied_ef_anos_iniciais_nivel_1', 'FUN_AI_CAT_2':'ied_ef_anos_iniciais_nivel_2',\n", + " 'FUN_AI_CAT_3':'ied_ef_anos_iniciais_nivel_3', 'FUN_AI_CAT_4':'ied_ef_anos_iniciais_nivel_4',\n", + " 'FUN_AI_CAT_5':'ied_ef_anos_iniciais_nivel_5', 'FUN_AI_CAT_6':'ied_ef_anos_iniciais_nivel_6',\n", + " 'FUN_AF_CAT_1':'ied_ef_anos_finais_nivel_1', 'FUN_AF_CAT_2':'ied_ef_anos_finais_nivel_2',\n", + " 'FUN_AF_CAT_3':'ied_ef_anos_finais_nivel_3', 'FUN_AF_CAT_4':'ied_ef_anos_finais_nivel_4',\n", + " 'FUN_AF_CAT_5':'ied_ef_anos_finais_nivel_5', 'FUN_AF_CAT_6':'ied_ef_anos_finais_nivel_6',\n", + " 'MED_CAT_1':'ied_em_nivel_1', 'MED_CAT_2':'ied_em_nivel_2', 'MED_CAT_3':'ied_em_nivel_3',\n", + " 'MED_CAT_4':'ied_em_nivel_4', 'MED_CAT_5':'ied_em_nivel_5', 'MED_CAT_6':'ied_em_nivel_6'}\n", + "\n", + "with ZipFile('/content/IED_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('IED_2022_BRASIL_REGIOES_UFS/IED_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " ied = pd.read_excel(f, skiprows=10, dtype=str)\n", + " ied = ied[ied.UNIDGEO.isin(['Brasil'])]\n", + " ied = ied.replace('--', '')\n", + " ied.rename(columns=rename_ied, inplace=True)\n", + " ied['localizacao'] = ied['localizacao'].str.lower().replace('pública', 'publica')\n", + " ied['rede'] = ied['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_ird = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'EDU_BAS_CAT_1':'ird_baixa_regularidade', 'EDU_BAS_CAT_2':'ird_media_baixa', \n", + " 'EDU_BAS_CAT_3':'ird_media_alta', 'EDU_BAS_CAT_4':'ird_alta'}\n", + "\n", + "with ZipFile('/content/IRD_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('IRD_2022_BRASIL_REGIOES_UFS/IRD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " ird = pd.read_excel(f, skiprows=9, dtype=str)\n", + " ird = ird[ird.UNIDGEO.isin(['Brasil'])]\n", + " ird = ird.replace('--', '')\n", + " ird.rename(columns=rename_ird, inplace=True)\n", + " ird['localizacao'] = ird['localizacao'].str.lower().replace('pública', 'publica')\n", + " ird['rede'] = ird['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_tdi = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'FUN_CAT_0':'tdi_ef', 'FUN_AI_CAT_0':'tdi_ef_anos_iniciais', 'FUN_AF_CAT_0':'tdi_ef_anos_finais',\n", + " 'FUN_01_CAT_0':'tdi_ef_1_ano', 'FUN_02_CAT_0':'tdi_ef_2_ano', 'FUN_03_CAT_0':'tdi_ef_3_ano',\n", + " 'FUN_04_CAT_0':'tdi_ef_4_ano', 'FUN_05_CAT_0':'tdi_ef_5_ano', 'FUN_06_CAT_0':'tdi_ef_6_ano',\n", + " 'FUN_07_CAT_0':'tdi_ef_7_ano', 'FUN_08_CAT_0':'tdi_ef_8_ano', 'FUN_09_CAT_0':'tdi_ef_9_ano',\n", + " 'MED_CAT_0':'tdi_em', 'MED_01_CAT_0':'tdi_em_1_ano', 'MED_02_CAT_0':'tdi_em_2_ano',\n", + " 'MED_03_CAT_0':'tdi_em_3_ano', 'MED_04_CAT_0':'tdi_em_4_ano'}\n", + "\n", + "with ZipFile('/content/TDI_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('TDI_2022_BRASIL_REGIOES_UFS/TDI_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " tdi = pd.read_excel(f, skiprows=8, dtype=str)\n", + " tdi = tdi[tdi.UNIDGEO.isin(['Brasil'])]\n", + " tdi = tdi.replace('--', '')\n", + " tdi.rename(columns=rename_tdi, inplace=True)\n", + " tdi['localizacao'] = tdi['localizacao'].str.lower().replace('pública', 'publica')\n", + " tdi['rede'] = tdi['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "# rename_tnr = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", + "# '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", + "# '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", + "# '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", + "# '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", + "# '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", + "\n", + "# with ZipFile('/content/tnr_municipios_2021.zip') as z:\n", + "# with z.open('tnr_municipios_2021/tnr_municipios_2021.xlsx') as f:\n", + "# tnr = pd.read_excel(f, skiprows=8, dtype=str)\n", + "# tnr = tnr[tnr.UNIDGEO.isin(['Brasil'])]\n", + "# tnr = tnr.replace('--', '')\n", + "# tnr.rename(columns=rename_tnr, inplace=True)\n", + "# tnr[index] = tnr[index].astype(str)\n", + "# tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", + "# tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", + " \n", + "\n", + "# rename_tx = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", + "# '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", + "# '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", + "# '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", + "# '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", + "# '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", + "# '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", + "# '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", + "# '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", + "# '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", + "# '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", + "# '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", + "# '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", + "# '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", + "# '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", + "# '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", + "# '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", + "# '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", + "# '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", + "\n", + "# with ZipFile('/content/tx_rend_municipios_2021.zip') as z:\n", + "# with z.open('tx_rend_municipios_2021/tx_rend_municipios_2021.xlsx') as f:\n", + "# tx = pd.read_excel(f, skiprows=8, dtype=str)\n", + "# tx = tx[tx.UNIDGEO.isin(['Brasil'])]\n", + "# tx = tx.replace('--', '')\n", + "# tx.rename(columns=rename_tx, inplace=True)\n", + "# tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", + "# tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", + " \n", + "index = ['ano',\t'UNIDGEO',\t'localizacao',\t'rede'] \n", + "df = pd.merge(afd, atu, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, dsu, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, had, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, icg, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, ied, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, ird, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, tdi, how='outer', left_on=index, right_on=index)\n", + "# df = pd.merge(df, tnr, how='outer', left_on=index, right_on=index)\n", + "# df = pd.merge(df, tx, how='outer', left_on=index, right_on=index)\n", + "\n", + "#COMO ALGUNS DADOS NÃO FORAM ATUALIZADOS, CRIA-SE DF2 COMO COMPLEMENTAR PARA A ORDENAÇÃO DA BASE\n", + "df_list = df.columns\n", + "df2 = list(set(ordem) - set(df_list))\n", + "df = pd.concat([df, pd.DataFrame(columns=df2)])\n", + "#ORDENA TODAS AS VARIÁVEIS \n", + "df = df[ordem]\n", + "\n", + "df.to_csv('/content/brasil.csv', index=False, encoding='utf-8', na_rep='')" + ], + "metadata": { + "id": "P_lweYEYiZWx" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "query = ''' \n", + "SELECT * FROM `basedosdados-dev.br_inep_indicadores_educacionais.brasil` WHERE ano = 2021\n", + "'''\n", + "\n", + "df_br = bd.read_sql(query, billing_project_id='input-bd')\n", + "\n", + "index = ['localizacao', 'rede']\n", + "drop = ['taxa_aprovacao_ef', 'taxa_aprovacao_ef_anos_iniciais', 'taxa_aprovacao_ef_anos_finais', 'taxa_aprovacao_ef_1_ano',\n", + " 'taxa_aprovacao_ef_2_ano', 'taxa_aprovacao_ef_3_ano', 'taxa_aprovacao_ef_4_ano', 'taxa_aprovacao_ef_5_ano',\n", + " 'taxa_aprovacao_ef_6_ano', 'taxa_aprovacao_ef_7_ano', 'taxa_aprovacao_ef_8_ano', 'taxa_aprovacao_ef_9_ano', 'taxa_aprovacao_em',\n", + " 'taxa_aprovacao_em_1_ano', 'taxa_aprovacao_em_2_ano', 'taxa_aprovacao_em_3_ano', 'taxa_aprovacao_em_4_ano',\n", + " 'taxa_aprovacao_em_nao_seriado', 'taxa_reprovacao_ef', 'taxa_reprovacao_ef_anos_iniciais', 'taxa_reprovacao_ef_anos_finais',\n", + " 'taxa_reprovacao_ef_1_ano', 'taxa_reprovacao_ef_2_ano', 'taxa_reprovacao_ef_3_ano', 'taxa_reprovacao_ef_4_ano',\n", + " 'taxa_reprovacao_ef_5_ano', 'taxa_reprovacao_ef_6_ano', 'taxa_reprovacao_ef_7_ano', 'taxa_reprovacao_ef_8_ano',\n", + " 'taxa_reprovacao_ef_9_ano', 'taxa_reprovacao_em', 'taxa_reprovacao_em_1_ano', 'taxa_reprovacao_em_2_ano',\n", + " 'taxa_reprovacao_em_3_ano', 'taxa_reprovacao_em_4_ano', 'taxa_reprovacao_em_nao_seriado', 'taxa_abandono_ef',\n", + " 'taxa_abandono_ef_anos_iniciais', 'taxa_abandono_ef_anos_finais', 'taxa_abandono_ef_1_ano', 'taxa_abandono_ef_2_ano',\n", + " 'taxa_abandono_ef_3_ano', 'taxa_abandono_ef_4_ano', 'taxa_abandono_ef_5_ano', 'taxa_abandono_ef_6_ano',\n", + " 'taxa_abandono_ef_7_ano', 'taxa_abandono_ef_8_ano', 'taxa_abandono_ef_9_ano', 'taxa_abandono_em', 'taxa_abandono_em_1_ano',\n", + " 'taxa_abandono_em_2_ano', 'taxa_abandono_em_3_ano', 'taxa_abandono_em_4_ano', 'taxa_abandono_em_nao_seriado',\n", + " 'tnr_ef', 'tnr_ef_anos_iniciais', 'tnr_ef_anos_finais', 'tnr_ef_1_ano', 'tnr_ef_2_ano', 'tnr_ef_3_ano', 'tnr_ef_4_ano',\n", + " 'tnr_ef_5_ano', 'tnr_ef_6_ano', 'tnr_ef_7_ano', 'tnr_ef_8_ano', 'tnr_ef_9_ano', 'tnr_em', 'tnr_em_1_ano', 'tnr_em_2_ano', 'tnr_em_3_ano',\n", + " 'tnr_em_4_ano', 'tnr_em_nao_seriado']\n", + "df_br.drop(drop, axis=1, inplace=True)\n", + "df_br = pd.merge(df_br, tnr, how='left', left_on=index, right_on=index)\n", + "df_br = pd.merge(df_br, tx, how='left', left_on=index, right_on=index)\n", + "df_br = df_br[ordem]\n", + "df_br.to_csv('/content/brasil.csv', index=False, encoding='utf-8', na_rep='')" + ], + "metadata": { + "id": "ZF1iUQgZvEK7" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Região" + ], + "metadata": { + "id": "0yfI7W9wjCf9" + } + }, + { + "cell_type": "code", + "source": [ + "ordem = ['regiao', 'localizacao', 'rede', 'atu_ei', 'atu_ei_creche', 'atu_ei_pre_escola', 'atu_ef', 'atu_ef_anos_iniciais',\n", + " 'atu_ef_anos_finais', 'atu_ef_1_ano', 'atu_ef_2_ano', 'atu_ef_3_ano', 'atu_ef_4_ano', 'atu_ef_5_ano',\n", + " 'atu_ef_6_ano', 'atu_ef_7_ano', 'atu_ef_8_ano', 'atu_ef_9_ano', 'atu_ef_turmas_unif_multi_fluxo',\n", + " 'atu_em', 'atu_em_1_ano', 'atu_em_2_ano', 'atu_em_3_ano', 'atu_em_4_ano', 'atu_em_nao_seriado', 'had_ei',\n", + " 'had_ei_creche', 'had_ei_pre_escola', 'had_ef', 'had_ef_anos_iniciais', 'had_ef_anos_finais', 'had_ef_1_ano',\n", + " 'had_ef_2_ano', 'had_ef_3_ano', 'had_ef_4_ano', 'had_ef_5_ano', 'had_ef_6_ano', 'had_ef_7_ano',\n", + " 'had_ef_8_ano', 'had_ef_9_ano', 'had_em', 'had_em_1_ano', 'had_em_2_ano', 'had_em_3_ano', 'had_em_4_ano',\n", + " 'had_em_nao_seriado', 'tdi_ef', 'tdi_ef_anos_iniciais', 'tdi_ef_anos_finais', 'tdi_ef_1_ano', 'tdi_ef_2_ano',\n", + " 'tdi_ef_3_ano', 'tdi_ef_4_ano', 'tdi_ef_5_ano', 'tdi_ef_6_ano', 'tdi_ef_7_ano', 'tdi_ef_8_ano', 'tdi_ef_9_ano',\n", + " 'tdi_em', 'tdi_em_1_ano', 'tdi_em_2_ano', 'tdi_em_3_ano', 'tdi_em_4_ano', 'taxa_aprovacao_ef', \n", + " 'taxa_aprovacao_ef_anos_iniciais', 'taxa_aprovacao_ef_anos_finais', 'taxa_aprovacao_ef_1_ano',\n", + " 'taxa_aprovacao_ef_2_ano', 'taxa_aprovacao_ef_3_ano', 'taxa_aprovacao_ef_4_ano', 'taxa_aprovacao_ef_5_ano',\n", + " 'taxa_aprovacao_ef_6_ano', 'taxa_aprovacao_ef_7_ano', 'taxa_aprovacao_ef_8_ano', 'taxa_aprovacao_ef_9_ano',\n", + " 'taxa_aprovacao_em', 'taxa_aprovacao_em_1_ano', 'taxa_aprovacao_em_2_ano', 'taxa_aprovacao_em_3_ano',\n", + " 'taxa_aprovacao_em_4_ano', 'taxa_aprovacao_em_nao_seriado', 'taxa_reprovacao_ef', 'taxa_reprovacao_ef_anos_iniciais',\n", + " 'taxa_reprovacao_ef_anos_finais', 'taxa_reprovacao_ef_1_ano', 'taxa_reprovacao_ef_2_ano', 'taxa_reprovacao_ef_3_ano',\n", + " 'taxa_reprovacao_ef_4_ano', 'taxa_reprovacao_ef_5_ano', 'taxa_reprovacao_ef_6_ano', 'taxa_reprovacao_ef_7_ano',\n", + " 'taxa_reprovacao_ef_8_ano' ,'taxa_reprovacao_ef_9_ano', 'taxa_reprovacao_em', 'taxa_reprovacao_em_1_ano',\n", + " 'taxa_reprovacao_em_2_ano' ,'taxa_reprovacao_em_3_ano', 'taxa_reprovacao_em_4_ano' ,'taxa_reprovacao_em_nao_seriado',\n", + " 'taxa_abandono_ef' ,'taxa_abandono_ef_anos_iniciais', 'taxa_abandono_ef_anos_finais', 'taxa_abandono_ef_1_ano', \n", + " 'taxa_abandono_ef_2_ano' ,'taxa_abandono_ef_3_ano', 'taxa_abandono_ef_4_ano', 'taxa_abandono_ef_5_ano', \n", + " 'taxa_abandono_ef_6_ano', 'taxa_abandono_ef_7_ano', 'taxa_abandono_ef_8_ano' ,'taxa_abandono_ef_9_ano', \n", + " 'taxa_abandono_em', 'taxa_abandono_em_1_ano' ,'taxa_abandono_em_2_ano', 'taxa_abandono_em_3_ano', \n", + " 'taxa_abandono_em_4_ano', 'taxa_abandono_em_nao_seriado', 'tnr_ef', 'tnr_ef_anos_iniciais', 'tnr_ef_anos_finais', \n", + " 'tnr_ef_1_ano', 'tnr_ef_2_ano', 'tnr_ef_3_ano', 'tnr_ef_4_ano', 'tnr_ef_5_ano', 'tnr_ef_6_ano', 'tnr_ef_7_ano',\n", + " 'tnr_ef_8_ano', 'tnr_ef_9_ano', 'tnr_em' ,'tnr_em_1_ano', 'tnr_em_2_ano', 'tnr_em_3_ano', 'tnr_em_4_ano', \n", + " 'tnr_em_nao_seriado', 'dsu_ei', 'dsu_ei_creche', 'dsu_ei_pre_escola', 'dsu_ef', 'dsu_ef_anos_iniciais', \n", + " 'dsu_ef_anos_finais' ,'dsu_em', 'dsu_ep', 'dsu_eja', 'dsu_ee', 'afd_ei_grupo_1', 'afd_ei_grupo_2', \n", + " 'afd_ei_grupo_3', 'afd_ei_grupo_4', 'afd_ei_grupo_5', 'afd_ef_grupo_1' ,'afd_ef_grupo_2', 'afd_ef_grupo_3',\n", + " 'afd_ef_grupo_4', 'afd_ef_grupo_5', 'afd_ef_anos_iniciais_grupo_1', 'afd_ef_anos_iniciais_grupo_2', \n", + " 'afd_ef_anos_iniciais_grupo_3', 'afd_ef_anos_iniciais_grupo_4' ,'afd_ef_anos_iniciais_grupo_5',\n", + " 'afd_ef_anos_finais_grupo_1', 'afd_ef_anos_finais_grupo_2', 'afd_ef_anos_finais_grupo_3', \n", + " 'afd_ef_anos_finais_grupo_4', 'afd_ef_anos_finais_grupo_5', 'afd_em_grupo_1', 'afd_em_grupo_2', 'afd_em_grupo_3', \n", + " 'afd_em_grupo_4', 'afd_em_grupo_5', 'afd_eja_fundamental_grupo_1', 'afd_eja_fundamental_grupo_2', \n", + " 'afd_eja_fundamental_grupo_3' ,'afd_eja_fundamental_grupo_4', 'afd_eja_fundamental_grupo_5',\n", + " 'afd_eja_medio_grupo_1', 'afd_eja_medio_grupo_2', 'afd_eja_medio_grupo_3' ,'afd_eja_medio_grupo_4', \n", + " 'afd_eja_medio_grupo_5' ,'ird_baixa_regularidade' ,'ird_media_baixa', 'ird_media_alta', 'ird_alta', \n", + " 'ied_ef_nivel_1' ,'ied_ef_nivel_2', 'ied_ef_nivel_3', 'ied_ef_nivel_4', 'ied_ef_nivel_5', 'ied_ef_nivel_6',\n", + " 'ied_ef_anos_iniciais_nivel_1', 'ied_ef_anos_iniciais_nivel_2', 'ied_ef_anos_iniciais_nivel_3', \n", + " 'ied_ef_anos_iniciais_nivel_4', 'ied_ef_anos_iniciais_nivel_5' ,'ied_ef_anos_iniciais_nivel_6',\n", + " 'ied_ef_anos_finais_nivel_1', 'ied_ef_anos_finais_nivel_2', 'ied_ef_anos_finais_nivel_3', \n", + " 'ied_ef_anos_finais_nivel_4', 'ied_ef_anos_finais_nivel_5' ,'ied_ef_anos_finais_nivel_6',\n", + " 'ied_em_nivel_1' ,'ied_em_nivel_2', 'ied_em_nivel_3', 'ied_em_nivel_4', 'ied_em_nivel_5', 'ied_em_nivel_6', \n", + " 'icg_nivel_1', 'icg_nivel_2', 'icg_nivel_3', 'icg_nivel_4', 'icg_nivel_5', 'icg_nivel_6']" + ], + "metadata": { + "id": "UIzF2PxrxSX3" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "regiao = ['Norte', 'Nordeste', 'Sudeste', 'Sul', 'Centro-Oeste']\n", + "rename_afd = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', 'ED_INF_CAT_1':'afd_ei_grupo_1', 'ED_INF_CAT_2':'afd_ei_grupo_2',\n", + " 'ED_INF_CAT_3':'afd_ei_grupo_3', 'ED_INF_CAT_4':'afd_ei_grupo_4', 'ED_INF_CAT_5':'afd_ei_grupo_5', 'FUN_CAT_1':'afd_ef_grupo_1', \n", + " 'FUN_CAT_2':'afd_ef_grupo_2', 'FUN_CAT_3':'afd_ef_grupo_3', 'FUN_CAT_4':'afd_ef_grupo_4', 'FUN_CAT_5':'afd_ef_grupo_5', 'FUN_AI_CAT_1':'afd_ef_anos_iniciais_grupo_1',\n", + " 'FUN_AI_CAT_2':'afd_ef_anos_iniciais_grupo_2', 'FUN_AI_CAT_3':'afd_ef_anos_iniciais_grupo_3', 'FUN_AI_CAT_4':'afd_ef_anos_iniciais_grupo_4',\n", + " 'FUN_AI_CAT_5':'afd_ef_anos_iniciais_grupo_5', 'FUN_AF_CAT_1':'afd_ef_anos_finais_grupo_1', 'FUN_AF_CAT_2':'afd_ef_anos_finais_grupo_2',\n", + " 'FUN_AF_CAT_3':'afd_ef_anos_finais_grupo_3', 'FUN_AF_CAT_4':'afd_ef_anos_finais_grupo_4', 'FUN_AF_CAT_5':'afd_ef_anos_finais_grupo_5',\n", + " 'MED_CAT_1':'afd_em_grupo_1', 'MED_CAT_2':'afd_em_grupo_2', 'MED_CAT_3':'afd_em_grupo_3', 'MED_CAT_4':'afd_em_grupo_4', 'MED_CAT_5':'afd_em_grupo_5',\n", + " 'EJA_FUN_CAT_1':'afd_eja_fundamental_grupo_1', 'EJA_FUN_CAT_2':'afd_eja_fundamental_grupo_2', 'EJA_FUN_CAT_3':'afd_eja_fundamental_grupo_3',\n", + " 'EJA_FUN_CAT_4':'afd_eja_fundamental_grupo_4', 'EJA_FUN_CAT_5':'afd_eja_fundamental_grupo_5', 'EJA_MED_CAT_1':'afd_eja_medio_grupo_1',\n", + " 'EJA_MED_CAT_2':'afd_eja_medio_grupo_2', 'EJA_MED_CAT_3':'afd_eja_medio_grupo_3', 'EJA_MED_CAT_4':'afd_eja_medio_grupo_4', \n", + " 'EJA_MED_CAT_5':'afd_eja_medio_grupo_5'}\n", + "\n", + "with ZipFile('/content/AFD_2022_BRASIL_REGIOES_UF.zip') as z:\n", + " with z.open('AFD_2022_BRASIL_REGIOES_UF/AFD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " afd = pd.read_excel(f, skiprows=10, dtype=str)\n", + " afd = afd.replace('--', '')\n", + " afd = afd[afd.UNIDGEO.isin(regiao)]\n", + " afd.rename(columns=rename_afd, inplace=True)\n", + " afd['localizacao'] = afd['localizacao'].str.lower().replace('pública', 'publica')\n", + " afd['rede'] = afd['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_atu = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'atu_ei', 'CRE_CAT_0':'atu_ei_creche', 'PRE_CAT_0':'atu_ei_pre_escola', 'FUN_CAT_0':'atu_ef',\n", + " 'FUN_AI_CAT_0':'atu_ef_anos_iniciais', 'FUN_AF_CAT_0':'atu_ef_anos_finais', 'FUN_01_CAT_0':'atu_ef_1_ano',\n", + " 'FUN_02_CAT_0':'atu_ef_2_ano', 'FUN_03_CAT_0':'atu_ef_3_ano', 'FUN_04_CAT_0':'atu_ef_4_ano',\n", + " 'FUN_05_CAT_0':'atu_ef_5_ano', 'FUN_06_CAT_0':'atu_ef_6_ano', 'FUN_07_CAT_0':'atu_ef_7_ano', \n", + " 'FUN_08_CAT_0':'atu_ef_8_ano', 'FUN_09_CAT_0':'atu_ef_9_ano', 'MULT_ETA_CAT_0':'atu_ef_turmas_unif_multi_fluxo',\n", + " 'MED_CAT_0':'atu_em', 'MED_01_CAT_0':'atu_em_1_ano', 'MED_02_CAT_0':'atu_em_2_ano', 'MED_03_CAT_0':'atu_em_3_ano',\n", + " 'MED_04_CAT_0':'atu_em_4_ano', 'MED_NS_CAT_0':'atu_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/ATU_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('ATU_2022_BRASIL_REGIOES_UFS/ATU_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " atu = pd.read_excel(f, skiprows=8, dtype=str)\n", + " atu = atu[atu.UNIDGEO.isin(regiao)]\n", + " atu = atu.replace('--', '')\n", + " atu.rename(columns=rename_atu, inplace=True)\n", + " atu['localizacao'] = atu['localizacao'].str.lower().replace('pública', 'publica')\n", + " atu['rede'] = atu['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_dsu = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'dsu_ei', 'CRE_CAT_0':'dsu_ei_creche', 'PRE_CAT_0':'dsu_ei_pre_escola', 'FUN_CAT_0':'dsu_ef',\n", + " 'FUN_AI_CAT_0':'dsu_ef_anos_iniciais', 'FUN_AF_CAT_0':'dsu_ef_anos_finais', 'MED_CAT_0':'dsu_em',\n", + " 'PROF_CAT_0':'dsu_ep', 'EJA_CAT_0':'dsu_eja', 'EDU_BAS_CAT_0':'dsu_ee'}\n", + "\n", + "with ZipFile('/content/DSU_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('DSU_2022_BRASIL_REGIOES_UFS/DSU_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " dsu = pd.read_excel(f, skiprows=9, dtype=str)\n", + " dsu = dsu[dsu.UNIDGEO.isin(regiao)]\n", + " dsu = dsu.replace('--', '')\n", + " dsu.rename(columns=rename_dsu, inplace=True)\n", + " dsu['localizacao'] = dsu['localizacao'].str.lower().replace('pública', 'publica')\n", + " dsu['rede'] = dsu['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_had = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'had_ei', 'CRE_CAT_0':'had_ei_creche', 'PRE_CAT_0':'had_ei_pre_escola',\n", + " 'FUN_CAT_0':'had_ef', 'FUN_AI_CAT_0':'had_ef_anos_iniciais', 'FUN_AF_CAT_0':'had_ef_anos_finais', 'FUN_01_CAT_0':'had_ef_1_ano',\n", + " 'FUN_02_CAT_0':'had_ef_2_ano', 'FUN_03_CAT_0':'had_ef_3_ano', 'FUN_04_CAT_0':'had_ef_4_ano', 'FUN_05_CAT_0':'had_ef_5_ano',\n", + " 'FUN_06_CAT_0':'had_ef_6_ano', 'FUN_07_CAT_0':'had_ef_7_ano', 'FUN_08_CAT_0':'had_ef_8_ano', 'FUN_09_CAT_0':'had_ef_9_ano',\n", + " 'MED_CAT_0':'had_em', 'MED_01_CAT_0':'had_em_1_ano', 'MED_02_CAT_0':'had_em_2_ano', 'MED_03_CAT_0':'had_em_3_ano',\n", + " 'MED_04_CAT_0':'had_em_4_ano', 'MED_NS_CAT_0':'had_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/HAD_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('HAD_2022_BRASIL_REGIOES_UFS/HAD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " had = pd.read_excel(f, skiprows=8, dtype=str)\n", + " had = had[had.UNIDGEO.isin(regiao)]\n", + " had = had.replace('--', '')\n", + " had.rename(columns=rename_had, inplace=True)\n", + " had['localizacao'] = had['localizacao'].str.lower().replace('pública', 'publica')\n", + " had['rede'] = had['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_icg = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'EDU_BAS_CAT_1':'icg_nivel_1', 'EDU_BAS_CAT_2':'icg_nivel_2', 'EDU_BAS_CAT_3':'icg_nivel_3',\n", + " 'EDU_BAS_CAT_4':'icg_nivel_4', 'EDU_BAS_CAT_5':'icg_nivel_5', 'EDU_BAS_CAT_6':'icg_nivel_6'}\n", + "\n", + "with ZipFile('/content/ICG_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('ICG_2022_BRASIL_REGIOES_UFS/ICG_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " icg = pd.read_excel(f, skiprows=8, dtype=str)\n", + " icg = icg[icg.UNIDGEO.isin(regiao)]\n", + " icg = icg.replace('--', '')\n", + " icg.rename(columns=rename_icg, inplace=True)\n", + " icg['localizacao'] = icg['localizacao'].str.lower().replace('pública', 'publica')\n", + " icg['rede'] = icg['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_ied = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'FUN_CAT_1':'ied_ef_nivel_1', 'FUN_CAT_2':'ied_ef_nivel_2', 'FUN_CAT_3':'ied_ef_nivel_3',\n", + " 'FUN_CAT_4':'ied_ef_nivel_4', 'FUN_CAT_5':'ied_ef_nivel_5', 'FUN_CAT_6':'ied_ef_nivel_6',\n", + " 'FUN_AI_CAT_1':'ied_ef_anos_iniciais_nivel_1', 'FUN_AI_CAT_2':'ied_ef_anos_iniciais_nivel_2',\n", + " 'FUN_AI_CAT_3':'ied_ef_anos_iniciais_nivel_3', 'FUN_AI_CAT_4':'ied_ef_anos_iniciais_nivel_4',\n", + " 'FUN_AI_CAT_5':'ied_ef_anos_iniciais_nivel_5', 'FUN_AI_CAT_6':'ied_ef_anos_iniciais_nivel_6',\n", + " 'FUN_AF_CAT_1':'ied_ef_anos_finais_nivel_1', 'FUN_AF_CAT_2':'ied_ef_anos_finais_nivel_2',\n", + " 'FUN_AF_CAT_3':'ied_ef_anos_finais_nivel_3', 'FUN_AF_CAT_4':'ied_ef_anos_finais_nivel_4',\n", + " 'FUN_AF_CAT_5':'ied_ef_anos_finais_nivel_5', 'FUN_AF_CAT_6':'ied_ef_anos_finais_nivel_6',\n", + " 'MED_CAT_1':'ied_em_nivel_1', 'MED_CAT_2':'ied_em_nivel_2', 'MED_CAT_3':'ied_em_nivel_3',\n", + " 'MED_CAT_4':'ied_em_nivel_4', 'MED_CAT_5':'ied_em_nivel_5', 'MED_CAT_6':'ied_em_nivel_6'}\n", + "\n", + "with ZipFile('/content/IED_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('IED_2022_BRASIL_REGIOES_UFS/IED_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " ied = pd.read_excel(f, skiprows=10, dtype=str)\n", + " ied = ied[ied.UNIDGEO.isin(regiao)]\n", + " ied = ied.replace('--', '')\n", + " ied.rename(columns=rename_ied, inplace=True)\n", + " ied['localizacao'] = ied['localizacao'].str.lower().replace('pública', 'publica')\n", + " ied['rede'] = ied['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_ird = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'EDU_BAS_CAT_1':'ird_baixa_regularidade', 'EDU_BAS_CAT_2':'ird_media_baixa', \n", + " 'EDU_BAS_CAT_3':'ird_media_alta', 'EDU_BAS_CAT_4':'ird_alta'}\n", + "\n", + "with ZipFile('/content/IRD_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('IRD_2022_BRASIL_REGIOES_UFS/IRD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " ird = pd.read_excel(f, skiprows=9, dtype=str)\n", + " ird = ird[ird.UNIDGEO.isin(regiao)]\n", + " ird = ird.replace('--', '')\n", + " ird.rename(columns=rename_ird, inplace=True)\n", + " ird['localizacao'] = ird['localizacao'].str.lower().replace('pública', 'publica')\n", + " ird['rede'] = ird['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_tdi = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'FUN_CAT_0':'tdi_ef', 'FUN_AI_CAT_0':'tdi_ef_anos_iniciais', 'FUN_AF_CAT_0':'tdi_ef_anos_finais',\n", + " 'FUN_01_CAT_0':'tdi_ef_1_ano', 'FUN_02_CAT_0':'tdi_ef_2_ano', 'FUN_03_CAT_0':'tdi_ef_3_ano',\n", + " 'FUN_04_CAT_0':'tdi_ef_4_ano', 'FUN_05_CAT_0':'tdi_ef_5_ano', 'FUN_06_CAT_0':'tdi_ef_6_ano',\n", + " 'FUN_07_CAT_0':'tdi_ef_7_ano', 'FUN_08_CAT_0':'tdi_ef_8_ano', 'FUN_09_CAT_0':'tdi_ef_9_ano',\n", + " 'MED_CAT_0':'tdi_em', 'MED_01_CAT_0':'tdi_em_1_ano', 'MED_02_CAT_0':'tdi_em_2_ano',\n", + " 'MED_03_CAT_0':'tdi_em_3_ano', 'MED_04_CAT_0':'tdi_em_4_ano'}\n", + "\n", + "with ZipFile('/content/TDI_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('TDI_2022_BRASIL_REGIOES_UFS/TDI_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " tdi = pd.read_excel(f, skiprows=8, dtype=str)\n", + " tdi = tdi[tdi.UNIDGEO.isin(regiao)]\n", + " tdi = tdi.replace('--', '')\n", + " tdi.rename(columns=rename_tdi, inplace=True)\n", + " tdi['localizacao'] = tdi['localizacao'].str.lower().replace('pública', 'publica')\n", + " tdi['rede'] = tdi['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "# rename_tnr = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", + "# '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", + "# '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", + "# '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", + "# '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", + "# '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", + "\n", + "# with ZipFile('/content/tnr_municipios_2021.zip') as z:\n", + "# with z.open('tnr_municipios_2021/tnr_municipios_2021.xlsx') as f:\n", + "# tnr = pd.read_excel(f, skiprows=8, dtype=str)\n", + "# tnr = tnr[tnr.UNIDGEO.isin(regiao)]\n", + "# tnr = tnr.replace('--', '')\n", + "# tnr.rename(columns=rename_tnr, inplace=True)\n", + "# tnr[index] = tnr[index].astype(str)\n", + "# tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", + "# tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", + " \n", + "\n", + "# rename_tx = {'NU_ANO_CENSO':'ano', 'UNIDGEO':'regiao', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", + "# '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", + "# '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", + "# '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", + "# '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", + "# '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", + "# '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", + "# '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", + "# '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", + "# '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", + "# '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", + "# '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", + "# '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", + "# '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", + "# '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", + "# '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", + "# '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", + "# '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", + "# '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", + "\n", + "# with ZipFile('/content/tx_rend_municipios_2021.zip') as z:\n", + "# with z.open('tx_rend_municipios_2021/tx_rend_municipios_2021.xlsx') as f:\n", + "# tx = pd.read_excel(f, skiprows=8, dtype=str)\n", + "# tx = tx[tx.UNIDGEO.isin(regiao)]\n", + "# tx = tx.replace('--', '')\n", + "# tx.rename(columns=rename_tx, inplace=True)\n", + "# tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", + "# tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", + " \n", + "index = ['ano',\t'regiao',\t'localizacao',\t'rede'] \n", + "df = pd.merge(afd, atu, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, dsu, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, had, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, icg, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, ied, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, ird, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, tdi, how='outer', left_on=index, right_on=index)\n", + "# df = pd.merge(df, tnr, how='outer', left_on=index, right_on=index)\n", + "# df = pd.merge(df, tx, how='outer', left_on=index, right_on=index)\n", + "\n", + "#COMO ALGUNS DADOS NÃO FORAM ATUALIZADOS, CRIA-SE DF2 COMO COMPLEMENTAR PARA A ORDENAÇÃO DA BASE\n", + "df_list = df.columns\n", + "df2 = list(set(ordem) - set(df_list))\n", + "df = pd.concat([df, pd.DataFrame(columns=df2)])\n", + "#ORDENA TODAS AS VARIÁVEIS \n", + "df = df[ordem]\n", + "\n", + "df.to_csv('/content/regiao.csv', index=False, encoding='utf-8', na_rep='')" + ], + "metadata": { + "id": "RvcgO46WxnQs" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### UF" + ], + "metadata": { + "id": "hqhcoua0ru0n" + } + }, + { + "cell_type": "code", + "source": [ + "query = 'SELECT sigla, nome FROM basedosdados.br_bd_diretorios_brasil.uf'\n", + "sigla = bd.read_sql(query, billing_project_id='input-bd')\n", + "sigla.rename(columns={'sigla':'sigla_uf'}, inplace=True)\n", + "nome_ufs = sigla['nome'].tolist()\n", + "ufs = {'Acre':'AC', 'Alagoas':'AL', 'Amazonas':'AM', 'Amapá':'AP', 'Bahia':'BA', 'Ceará':'CE', 'Distrito Federal':'DF', 'Espírito Santo':'ES', 'Goiás':'GO', \n", + " 'Maranhão':'MA', 'Minas Gerais':'MG', 'Mato Grosso do Sul':'MS', 'Mato Gross':'MT', \n", + " 'Pará':'PA', 'Paraíba':'PB', 'Pernambuco':'PE', 'Piauí':'PI', 'Paraná':'PR', 'Rio de Janeiro':'RJ', 'Rio Grande do Norte':'RN', \n", + " 'Rondônia':'RO', 'Roraima':'RR', 'Rio Grande do Sul':'RS', 'Santa Catarina':'SC', 'Sergipe':'SE', 'São Paulo':'SP', 'Tocantins':'TO'}" + ], + "metadata": { + "id": "ovuiiT7AtIjJ", + "colab": { + "base_uri": "https://localhost:8080/" }, + "outputId": "f5d2bde6-54d5-4d2f-9f3c-6891e74f6bf4" + }, + "execution_count": 4, + "outputs": [ { - "cell_type": "code", - "source": [ - "df_escola = pd.read_csv('/content/staging_br_inep_indicadores_educacionais_escola_ano=2021_escola.csv')\n", - "\n", - "index = ['id_municipio', 'id_escola', 'localizacao', 'rede']\n", - "drop = ['taxa_aprovacao_ef', 'taxa_aprovacao_ef_anos_iniciais',\n", - " 'taxa_aprovacao_ef_anos_finais', 'taxa_aprovacao_ef_1_ano',\n", - " 'taxa_aprovacao_ef_2_ano', 'taxa_aprovacao_ef_3_ano',\n", - " 'taxa_aprovacao_ef_4_ano', 'taxa_aprovacao_ef_5_ano',\n", - " 'taxa_aprovacao_ef_6_ano', 'taxa_aprovacao_ef_7_ano',\n", - " 'taxa_aprovacao_ef_8_ano', 'taxa_aprovacao_ef_9_ano', 'taxa_aprovacao_em',\n", - " 'taxa_aprovacao_em_1_ano', 'taxa_aprovacao_em_2_ano',\n", - " 'taxa_aprovacao_em_3_ano', 'taxa_aprovacao_em_4_ano',\n", - " 'taxa_aprovacao_em_nao_seriado', 'taxa_reprovacao_ef',\n", - " 'taxa_reprovacao_ef_anos_iniciais', 'taxa_reprovacao_ef_anos_finais',\n", - " 'taxa_reprovacao_ef_1_ano', 'taxa_reprovacao_ef_2_ano',\n", - " 'taxa_reprovacao_ef_3_ano', 'taxa_reprovacao_ef_4_ano',\n", - " 'taxa_reprovacao_ef_5_ano', 'taxa_reprovacao_ef_6_ano',\n", - " 'taxa_reprovacao_ef_7_ano', 'taxa_reprovacao_ef_8_ano',\n", - " 'taxa_reprovacao_ef_9_ano', 'taxa_reprovacao_em',\n", - " 'taxa_reprovacao_em_1_ano', 'taxa_reprovacao_em_2_ano',\n", - " 'taxa_reprovacao_em_3_ano', 'taxa_reprovacao_em_4_ano',\n", - " 'taxa_reprovacao_em_nao_seriado', 'taxa_abandono_ef',\n", - " 'taxa_abandono_ef_anos_iniciais', 'taxa_abandono_ef_anos_finais',\n", - " 'taxa_abandono_ef_1_ano', 'taxa_abandono_ef_2_ano',\n", - " 'taxa_abandono_ef_3_ano', 'taxa_abandono_ef_4_ano',\n", - " 'taxa_abandono_ef_5_ano', 'taxa_abandono_ef_6_ano',\n", - " 'taxa_abandono_ef_7_ano', 'taxa_abandono_ef_8_ano',\n", - " 'taxa_abandono_ef_9_ano', 'taxa_abandono_em', 'taxa_abandono_em_1_ano',\n", - " 'taxa_abandono_em_2_ano', 'taxa_abandono_em_3_ano',\n", - " 'taxa_abandono_em_4_ano', 'taxa_abandono_em_nao_seriado',\n", - " 'tnr_ef', 'tnr_ef_anos_iniciais', 'tnr_ef_anos_finais',\n", - " 'tnr_ef_1_ano', 'tnr_ef_2_ano', 'tnr_ef_3_ano', 'tnr_ef_4_ano',\n", - " 'tnr_ef_5_ano', 'tnr_ef_6_ano', 'tnr_ef_7_ano', 'tnr_ef_8_ano',\n", - " 'tnr_ef_9_ano', 'tnr_em', 'tnr_em_1_ano', 'tnr_em_2_ano', 'tnr_em_3_ano',\n", - " 'tnr_em_4_ano', 'tnr_em_nao_seriado']\n", - "df_escola.drop(drop, axis=1, inplace=True)\n", - "df_escola[index] = df_escola[index].astype(str)" - ], - "metadata": { - "id": "T3BnKespuGFu" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stderr", + "text": [ + "Downloading: 100%|██████████| 27/27 [00:00<00:00, 150.63rows/s]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "ordem = ['sigla_uf', 'localizacao', 'rede', 'atu_ei', 'atu_ei_creche', 'atu_ei_pre_escola', 'atu_ef', 'atu_ef_anos_iniciais',\n", + " 'atu_ef_anos_finais', 'atu_ef_1_ano', 'atu_ef_2_ano', 'atu_ef_3_ano', 'atu_ef_4_ano', 'atu_ef_5_ano',\n", + " 'atu_ef_6_ano', 'atu_ef_7_ano', 'atu_ef_8_ano', 'atu_ef_9_ano', 'atu_ef_turmas_unif_multi_fluxo',\n", + " 'atu_em', 'atu_em_1_ano', 'atu_em_2_ano', 'atu_em_3_ano', 'atu_em_4_ano', 'atu_em_nao_seriado', 'had_ei',\n", + " 'had_ei_creche', 'had_ei_pre_escola', 'had_ef', 'had_ef_anos_iniciais', 'had_ef_anos_finais', 'had_ef_1_ano',\n", + " 'had_ef_2_ano', 'had_ef_3_ano', 'had_ef_4_ano', 'had_ef_5_ano', 'had_ef_6_ano', 'had_ef_7_ano',\n", + " 'had_ef_8_ano', 'had_ef_9_ano', 'had_em', 'had_em_1_ano', 'had_em_2_ano', 'had_em_3_ano', 'had_em_4_ano',\n", + " 'had_em_nao_seriado', 'tdi_ef', 'tdi_ef_anos_iniciais', 'tdi_ef_anos_finais', 'tdi_ef_1_ano', 'tdi_ef_2_ano',\n", + " 'tdi_ef_3_ano', 'tdi_ef_4_ano', 'tdi_ef_5_ano', 'tdi_ef_6_ano', 'tdi_ef_7_ano', 'tdi_ef_8_ano', 'tdi_ef_9_ano',\n", + " 'tdi_em', 'tdi_em_1_ano', 'tdi_em_2_ano', 'tdi_em_3_ano', 'tdi_em_4_ano', 'taxa_aprovacao_ef', \n", + " 'taxa_aprovacao_ef_anos_iniciais', 'taxa_aprovacao_ef_anos_finais', 'taxa_aprovacao_ef_1_ano',\n", + " 'taxa_aprovacao_ef_2_ano', 'taxa_aprovacao_ef_3_ano', 'taxa_aprovacao_ef_4_ano', 'taxa_aprovacao_ef_5_ano',\n", + " 'taxa_aprovacao_ef_6_ano', 'taxa_aprovacao_ef_7_ano', 'taxa_aprovacao_ef_8_ano', 'taxa_aprovacao_ef_9_ano',\n", + " 'taxa_aprovacao_em', 'taxa_aprovacao_em_1_ano', 'taxa_aprovacao_em_2_ano', 'taxa_aprovacao_em_3_ano',\n", + " 'taxa_aprovacao_em_4_ano', 'taxa_aprovacao_em_nao_seriado', 'taxa_reprovacao_ef', 'taxa_reprovacao_ef_anos_iniciais',\n", + " 'taxa_reprovacao_ef_anos_finais', 'taxa_reprovacao_ef_1_ano', 'taxa_reprovacao_ef_2_ano', 'taxa_reprovacao_ef_3_ano',\n", + " 'taxa_reprovacao_ef_4_ano', 'taxa_reprovacao_ef_5_ano', 'taxa_reprovacao_ef_6_ano', 'taxa_reprovacao_ef_7_ano',\n", + " 'taxa_reprovacao_ef_8_ano' ,'taxa_reprovacao_ef_9_ano', 'taxa_reprovacao_em', 'taxa_reprovacao_em_1_ano',\n", + " 'taxa_reprovacao_em_2_ano' ,'taxa_reprovacao_em_3_ano', 'taxa_reprovacao_em_4_ano' ,'taxa_reprovacao_em_nao_seriado',\n", + " 'taxa_abandono_ef' ,'taxa_abandono_ef_anos_iniciais', 'taxa_abandono_ef_anos_finais', 'taxa_abandono_ef_1_ano', \n", + " 'taxa_abandono_ef_2_ano' ,'taxa_abandono_ef_3_ano', 'taxa_abandono_ef_4_ano', 'taxa_abandono_ef_5_ano', \n", + " 'taxa_abandono_ef_6_ano', 'taxa_abandono_ef_7_ano', 'taxa_abandono_ef_8_ano' ,'taxa_abandono_ef_9_ano', \n", + " 'taxa_abandono_em', 'taxa_abandono_em_1_ano' ,'taxa_abandono_em_2_ano', 'taxa_abandono_em_3_ano', \n", + " 'taxa_abandono_em_4_ano', 'taxa_abandono_em_nao_seriado', 'tnr_ef', 'tnr_ef_anos_iniciais', 'tnr_ef_anos_finais', \n", + " 'tnr_ef_1_ano', 'tnr_ef_2_ano', 'tnr_ef_3_ano', 'tnr_ef_4_ano', 'tnr_ef_5_ano', 'tnr_ef_6_ano', 'tnr_ef_7_ano',\n", + " 'tnr_ef_8_ano', 'tnr_ef_9_ano', 'tnr_em' ,'tnr_em_1_ano', 'tnr_em_2_ano', 'tnr_em_3_ano', 'tnr_em_4_ano', \n", + " 'tnr_em_nao_seriado', 'dsu_ei', 'dsu_ei_creche', 'dsu_ei_pre_escola', 'dsu_ef', 'dsu_ef_anos_iniciais', \n", + " 'dsu_ef_anos_finais' ,'dsu_em', 'dsu_ep', 'dsu_eja', 'dsu_ee', 'afd_ei_grupo_1', 'afd_ei_grupo_2', \n", + " 'afd_ei_grupo_3', 'afd_ei_grupo_4', 'afd_ei_grupo_5', 'afd_ef_grupo_1' ,'afd_ef_grupo_2', 'afd_ef_grupo_3',\n", + " 'afd_ef_grupo_4', 'afd_ef_grupo_5', 'afd_ef_anos_iniciais_grupo_1', 'afd_ef_anos_iniciais_grupo_2', \n", + " 'afd_ef_anos_iniciais_grupo_3', 'afd_ef_anos_iniciais_grupo_4' ,'afd_ef_anos_iniciais_grupo_5',\n", + " 'afd_ef_anos_finais_grupo_1', 'afd_ef_anos_finais_grupo_2', 'afd_ef_anos_finais_grupo_3', \n", + " 'afd_ef_anos_finais_grupo_4', 'afd_ef_anos_finais_grupo_5', 'afd_em_grupo_1', 'afd_em_grupo_2', 'afd_em_grupo_3', \n", + " 'afd_em_grupo_4', 'afd_em_grupo_5', 'afd_eja_fundamental_grupo_1', 'afd_eja_fundamental_grupo_2', \n", + " 'afd_eja_fundamental_grupo_3' ,'afd_eja_fundamental_grupo_4', 'afd_eja_fundamental_grupo_5',\n", + " 'afd_eja_medio_grupo_1', 'afd_eja_medio_grupo_2', 'afd_eja_medio_grupo_3' ,'afd_eja_medio_grupo_4', \n", + " 'afd_eja_medio_grupo_5' ,'ird_baixa_regularidade' ,'ird_media_baixa', 'ird_media_alta', 'ird_alta', \n", + " 'ied_ef_nivel_1' ,'ied_ef_nivel_2', 'ied_ef_nivel_3', 'ied_ef_nivel_4', 'ied_ef_nivel_5', 'ied_ef_nivel_6',\n", + " 'ied_ef_anos_iniciais_nivel_1', 'ied_ef_anos_iniciais_nivel_2', 'ied_ef_anos_iniciais_nivel_3', \n", + " 'ied_ef_anos_iniciais_nivel_4', 'ied_ef_anos_iniciais_nivel_5' ,'ied_ef_anos_iniciais_nivel_6',\n", + " 'ied_ef_anos_finais_nivel_1', 'ied_ef_anos_finais_nivel_2', 'ied_ef_anos_finais_nivel_3', \n", + " 'ied_ef_anos_finais_nivel_4', 'ied_ef_anos_finais_nivel_5' ,'ied_ef_anos_finais_nivel_6',\n", + " 'ied_em_nivel_1' ,'ied_em_nivel_2', 'ied_em_nivel_3', 'ied_em_nivel_4', 'ied_em_nivel_5', 'ied_em_nivel_6', \n", + " 'icg_nivel_1', 'icg_nivel_2', 'icg_nivel_3', 'icg_nivel_4', 'icg_nivel_5', 'icg_nivel_6']" + ], + "metadata": { + "id": "RgpH_CmKzp2T" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "rename_afd = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', 'ED_INF_CAT_1':'afd_ei_grupo_1', 'ED_INF_CAT_2':'afd_ei_grupo_2',\n", + " 'ED_INF_CAT_3':'afd_ei_grupo_3', 'ED_INF_CAT_4':'afd_ei_grupo_4', 'ED_INF_CAT_5':'afd_ei_grupo_5', 'FUN_CAT_1':'afd_ef_grupo_1', \n", + " 'FUN_CAT_2':'afd_ef_grupo_2', 'FUN_CAT_3':'afd_ef_grupo_3', 'FUN_CAT_4':'afd_ef_grupo_4', 'FUN_CAT_5':'afd_ef_grupo_5', 'FUN_AI_CAT_1':'afd_ef_anos_iniciais_grupo_1',\n", + " 'FUN_AI_CAT_2':'afd_ef_anos_iniciais_grupo_2', 'FUN_AI_CAT_3':'afd_ef_anos_iniciais_grupo_3', 'FUN_AI_CAT_4':'afd_ef_anos_iniciais_grupo_4',\n", + " 'FUN_AI_CAT_5':'afd_ef_anos_iniciais_grupo_5', 'FUN_AF_CAT_1':'afd_ef_anos_finais_grupo_1', 'FUN_AF_CAT_2':'afd_ef_anos_finais_grupo_2',\n", + " 'FUN_AF_CAT_3':'afd_ef_anos_finais_grupo_3', 'FUN_AF_CAT_4':'afd_ef_anos_finais_grupo_4', 'FUN_AF_CAT_5':'afd_ef_anos_finais_grupo_5',\n", + " 'MED_CAT_1':'afd_em_grupo_1', 'MED_CAT_2':'afd_em_grupo_2', 'MED_CAT_3':'afd_em_grupo_3', 'MED_CAT_4':'afd_em_grupo_4', 'MED_CAT_5':'afd_em_grupo_5',\n", + " 'EJA_FUN_CAT_1':'afd_eja_fundamental_grupo_1', 'EJA_FUN_CAT_2':'afd_eja_fundamental_grupo_2', 'EJA_FUN_CAT_3':'afd_eja_fundamental_grupo_3',\n", + " 'EJA_FUN_CAT_4':'afd_eja_fundamental_grupo_4', 'EJA_FUN_CAT_5':'afd_eja_fundamental_grupo_5', 'EJA_MED_CAT_1':'afd_eja_medio_grupo_1',\n", + " 'EJA_MED_CAT_2':'afd_eja_medio_grupo_2', 'EJA_MED_CAT_3':'afd_eja_medio_grupo_3', 'EJA_MED_CAT_4':'afd_eja_medio_grupo_4', \n", + " 'EJA_MED_CAT_5':'afd_eja_medio_grupo_5'}\n", + "\n", + "with ZipFile('/content/AFD_2022_BRASIL_REGIOES_UF.zip') as z:\n", + " with z.open('AFD_2022_BRASIL_REGIOES_UF/AFD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " afd = pd.read_excel(f, skiprows=10, dtype=str)\n", + " afd = afd.replace('--', '')\n", + " afd = afd[afd.UNIDGEO.isin(nome_ufs)] \n", + " afd['sigla_uf'] = afd['UNIDGEO'].map(ufs)\n", + " afd.rename(columns=rename_afd, inplace=True)\n", + " afd['localizacao'] = afd['localizacao'].str.lower().replace('pública', 'publica')\n", + " afd['rede'] = afd['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_atu = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'atu_ei', 'CRE_CAT_0':'atu_ei_creche', 'PRE_CAT_0':'atu_ei_pre_escola', 'FUN_CAT_0':'atu_ef',\n", + " 'FUN_AI_CAT_0':'atu_ef_anos_iniciais', 'FUN_AF_CAT_0':'atu_ef_anos_finais', 'FUN_01_CAT_0':'atu_ef_1_ano',\n", + " 'FUN_02_CAT_0':'atu_ef_2_ano', 'FUN_03_CAT_0':'atu_ef_3_ano', 'FUN_04_CAT_0':'atu_ef_4_ano',\n", + " 'FUN_05_CAT_0':'atu_ef_5_ano', 'FUN_06_CAT_0':'atu_ef_6_ano', 'FUN_07_CAT_0':'atu_ef_7_ano', \n", + " 'FUN_08_CAT_0':'atu_ef_8_ano', 'FUN_09_CAT_0':'atu_ef_9_ano', 'MULT_ETA_CAT_0':'atu_ef_turmas_unif_multi_fluxo',\n", + " 'MED_CAT_0':'atu_em', 'MED_01_CAT_0':'atu_em_1_ano', 'MED_02_CAT_0':'atu_em_2_ano', 'MED_03_CAT_0':'atu_em_3_ano',\n", + " 'MED_04_CAT_0':'atu_em_4_ano', 'MED_NS_CAT_0':'atu_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/ATU_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('ATU_2022_BRASIL_REGIOES_UFS/ATU_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " atu = pd.read_excel(f, skiprows=8, dtype=str)\n", + " atu = atu[atu.UNIDGEO.isin(nome_ufs)]\n", + " atu['sigla_uf'] = atu['UNIDGEO'].map(ufs)\n", + " atu = atu.replace('--', '')\n", + " atu.rename(columns=rename_atu, inplace=True)\n", + " atu['localizacao'] = atu['localizacao'].str.lower().replace('pública', 'publica')\n", + " atu['rede'] = atu['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_dsu = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'dsu_ei', 'CRE_CAT_0':'dsu_ei_creche', 'PRE_CAT_0':'dsu_ei_pre_escola', 'FUN_CAT_0':'dsu_ef',\n", + " 'FUN_AI_CAT_0':'dsu_ef_anos_iniciais', 'FUN_AF_CAT_0':'dsu_ef_anos_finais', 'MED_CAT_0':'dsu_em',\n", + " 'PROF_CAT_0':'dsu_ep', 'EJA_CAT_0':'dsu_eja', 'EDU_BAS_CAT_0':'dsu_ee'}\n", + "\n", + "with ZipFile('/content/DSU_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('DSU_2022_BRASIL_REGIOES_UFS/DSU_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " dsu = pd.read_excel(f, skiprows=9, dtype=str)\n", + " dsu = dsu[dsu.UNIDGEO.isin(nome_ufs)]\n", + " dsu['sigla_uf'] = dsu['UNIDGEO'].map(ufs)\n", + " dsu = dsu.replace('--', '')\n", + " dsu.rename(columns=rename_dsu, inplace=True)\n", + " dsu['localizacao'] = dsu['localizacao'].str.lower().replace('pública', 'publica')\n", + " dsu['rede'] = dsu['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_had = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'had_ei', 'CRE_CAT_0':'had_ei_creche', 'PRE_CAT_0':'had_ei_pre_escola',\n", + " 'FUN_CAT_0':'had_ef', 'FUN_AI_CAT_0':'had_ef_anos_iniciais', 'FUN_AF_CAT_0':'had_ef_anos_finais', 'FUN_01_CAT_0':'had_ef_1_ano',\n", + " 'FUN_02_CAT_0':'had_ef_2_ano', 'FUN_03_CAT_0':'had_ef_3_ano', 'FUN_04_CAT_0':'had_ef_4_ano', 'FUN_05_CAT_0':'had_ef_5_ano',\n", + " 'FUN_06_CAT_0':'had_ef_6_ano', 'FUN_07_CAT_0':'had_ef_7_ano', 'FUN_08_CAT_0':'had_ef_8_ano', 'FUN_09_CAT_0':'had_ef_9_ano',\n", + " 'MED_CAT_0':'had_em', 'MED_01_CAT_0':'had_em_1_ano', 'MED_02_CAT_0':'had_em_2_ano', 'MED_03_CAT_0':'had_em_3_ano',\n", + " 'MED_04_CAT_0':'had_em_4_ano', 'MED_NS_CAT_0':'had_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/HAD_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('HAD_2022_BRASIL_REGIOES_UFS/HAD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " had = pd.read_excel(f, skiprows=8, dtype=str)\n", + " had = had[had.UNIDGEO.isin(nome_ufs)]\n", + " had['sigla_uf'] = had['UNIDGEO'].map(ufs)\n", + " had = had.replace('--', '')\n", + " had.rename(columns=rename_had, inplace=True)\n", + " had['localizacao'] = had['localizacao'].str.lower().replace('pública', 'publica')\n", + " had['rede'] = had['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_icg = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'EDU_BAS_CAT_1':'icg_nivel_1', 'EDU_BAS_CAT_2':'icg_nivel_2', 'EDU_BAS_CAT_3':'icg_nivel_3',\n", + " 'EDU_BAS_CAT_4':'icg_nivel_4', 'EDU_BAS_CAT_5':'icg_nivel_5', 'EDU_BAS_CAT_6':'icg_nivel_6'}\n", + "\n", + "with ZipFile('/content/ICG_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('ICG_2022_BRASIL_REGIOES_UFS/ICG_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " icg = pd.read_excel(f, skiprows=8, dtype=str)\n", + " icg = icg[icg.UNIDGEO.isin(nome_ufs)]\n", + " icg['sigla_uf'] = icg['UNIDGEO'].map(ufs)\n", + " icg = icg.replace('--', '')\n", + " icg.rename(columns=rename_icg, inplace=True)\n", + " icg['localizacao'] = icg['localizacao'].str.lower().replace('pública', 'publica')\n", + " icg['rede'] = icg['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_ied = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'FUN_CAT_1':'ied_ef_nivel_1', 'FUN_CAT_2':'ied_ef_nivel_2', 'FUN_CAT_3':'ied_ef_nivel_3',\n", + " 'FUN_CAT_4':'ied_ef_nivel_4', 'FUN_CAT_5':'ied_ef_nivel_5', 'FUN_CAT_6':'ied_ef_nivel_6',\n", + " 'FUN_AI_CAT_1':'ied_ef_anos_iniciais_nivel_1', 'FUN_AI_CAT_2':'ied_ef_anos_iniciais_nivel_2',\n", + " 'FUN_AI_CAT_3':'ied_ef_anos_iniciais_nivel_3', 'FUN_AI_CAT_4':'ied_ef_anos_iniciais_nivel_4',\n", + " 'FUN_AI_CAT_5':'ied_ef_anos_iniciais_nivel_5', 'FUN_AI_CAT_6':'ied_ef_anos_iniciais_nivel_6',\n", + " 'FUN_AF_CAT_1':'ied_ef_anos_finais_nivel_1', 'FUN_AF_CAT_2':'ied_ef_anos_finais_nivel_2',\n", + " 'FUN_AF_CAT_3':'ied_ef_anos_finais_nivel_3', 'FUN_AF_CAT_4':'ied_ef_anos_finais_nivel_4',\n", + " 'FUN_AF_CAT_5':'ied_ef_anos_finais_nivel_5', 'FUN_AF_CAT_6':'ied_ef_anos_finais_nivel_6',\n", + " 'MED_CAT_1':'ied_em_nivel_1', 'MED_CAT_2':'ied_em_nivel_2', 'MED_CAT_3':'ied_em_nivel_3',\n", + " 'MED_CAT_4':'ied_em_nivel_4', 'MED_CAT_5':'ied_em_nivel_5', 'MED_CAT_6':'ied_em_nivel_6'}\n", + "\n", + "with ZipFile('/content/IED_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('IED_2022_BRASIL_REGIOES_UFS/IED_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " ied = pd.read_excel(f, skiprows=10, dtype=str)\n", + " ied = ied[ied.UNIDGEO.isin(nome_ufs)]\n", + " ied['sigla_uf'] = ied['UNIDGEO'].map(ufs)\n", + " ied = ied.replace('--', '')\n", + " ied.rename(columns=rename_ied, inplace=True)\n", + " ied['localizacao'] = ied['localizacao'].str.lower().replace('pública', 'publica')\n", + " ied['rede'] = ied['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_ird = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'EDU_BAS_CAT_1':'ird_baixa_regularidade', 'EDU_BAS_CAT_2':'ird_media_baixa', \n", + " 'EDU_BAS_CAT_3':'ird_media_alta', 'EDU_BAS_CAT_4':'ird_alta'}\n", + "\n", + "with ZipFile('/content/IRD_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('IRD_2022_BRASIL_REGIOES_UFS/IRD_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " ird = pd.read_excel(f, skiprows=9, dtype=str)\n", + " ird = ird[ird.UNIDGEO.isin(nome_ufs)]\n", + " ird['sigla_uf'] = ird['UNIDGEO'].map(ufs)\n", + " ird = ird.replace('--', '')\n", + " ird.rename(columns=rename_ird, inplace=True)\n", + " ird['localizacao'] = ird['localizacao'].str.lower().replace('pública', 'publica')\n", + " ird['rede'] = ird['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_tdi = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'FUN_CAT_0':'tdi_ef', 'FUN_AI_CAT_0':'tdi_ef_anos_iniciais', 'FUN_AF_CAT_0':'tdi_ef_anos_finais',\n", + " 'FUN_01_CAT_0':'tdi_ef_1_ano', 'FUN_02_CAT_0':'tdi_ef_2_ano', 'FUN_03_CAT_0':'tdi_ef_3_ano',\n", + " 'FUN_04_CAT_0':'tdi_ef_4_ano', 'FUN_05_CAT_0':'tdi_ef_5_ano', 'FUN_06_CAT_0':'tdi_ef_6_ano',\n", + " 'FUN_07_CAT_0':'tdi_ef_7_ano', 'FUN_08_CAT_0':'tdi_ef_8_ano', 'FUN_09_CAT_0':'tdi_ef_9_ano',\n", + " 'MED_CAT_0':'tdi_em', 'MED_01_CAT_0':'tdi_em_1_ano', 'MED_02_CAT_0':'tdi_em_2_ano',\n", + " 'MED_03_CAT_0':'tdi_em_3_ano', 'MED_04_CAT_0':'tdi_em_4_ano'}\n", + "\n", + "with ZipFile('/content/TDI_2022_BRASIL_REGIOES_UFS.zip') as z:\n", + " with z.open('TDI_2022_BRASIL_REGIOES_UFS/TDI_BRASIL_REGIOES_UFS_2022.xlsx') as f:\n", + " tdi = pd.read_excel(f, skiprows=8, dtype=str)\n", + " tdi = tdi[tdi.UNIDGEO.isin(nome_ufs)]\n", + " tdi['sigla_uf'] = tdi['UNIDGEO'].map(ufs)\n", + " tdi = tdi.replace('--', '')\n", + " tdi.rename(columns=rename_tdi, inplace=True)\n", + " tdi['localizacao'] = tdi['localizacao'].str.lower().replace('pública', 'publica')\n", + " tdi['rede'] = tdi['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "# rename_tnr = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", + "# '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", + "# '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", + "# '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", + "# '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", + "# '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", + "\n", + "# with ZipFile('/content/tnr_municipios_2021.zip') as z:\n", + "# with z.open('tnr_municipios_2021/tnr_municipios_2021.xlsx') as f:\n", + "# tnr = pd.read_excel(f, skiprows=8, dtype=str)\n", + "# tnr = tnr[tnr.UNIDGEO.isin(nome_ufs)]\n", + "# tnr['sigla_uf'] = tnr['UNIDGEO'].map(ufs)\n", + "# tnr = tnr.replace('--', '')\n", + "# tnr.rename(columns=rename_tnr, inplace=True)\n", + "# tnr[index] = tnr[index].astype(str)\n", + "# tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", + "# tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", + " \n", + "\n", + "# rename_tx = {'NU_ANO_CENSO':'ano', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", + "# '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", + "# '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", + "# '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", + "# '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", + "# '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", + "# '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", + "# '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", + "# '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", + "# '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", + "# '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", + "# '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", + "# '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", + "# '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", + "# '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", + "# '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", + "# '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", + "# '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", + "# '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", + "\n", + "# with ZipFile('/content/tx_rend_municipios_2021.zip') as z:\n", + "# with z.open('tx_rend_municipios_2021/tx_rend_municipios_2021.xlsx') as f:\n", + "# tx = pd.read_excel(f, skiprows=8, dtype=str)\n", + "# tx = tx[tx.UNIDGEO.isin(nome_ufs)]\n", + "# tx['sigla_uf'] = tx['UNIDGEO'].map(ufs)\n", + "# tx = tx.replace('--', '')\n", + "# tx.rename(columns=rename_tx, inplace=True)\n", + "# tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", + "# tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", + " \n", + "index = ['ano',\t'UNIDGEO',\t'sigla_uf', 'localizacao',\t'rede'] \n", + "df = pd.merge(afd, atu, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, dsu, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, had, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, icg, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, ied, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, ird, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, tdi, how='outer', left_on=index, right_on=index)\n", + "# df = pd.merge(df, tnr, how='outer', left_on=index, right_on=index)\n", + "# df = pd.merge(df, tx, how='outer', left_on=index, right_on=index)\n", + "\n", + "#COMO ALGUNS DADOS NÃO FORAM ATUALIZADOS, CRIA-SE DF2 COMO COMPLEMENTAR PARA A ORDENAÇÃO DA BASE\n", + "df_list = df.columns\n", + "df2 = list(set(ordem) - set(df_list))\n", + "df = pd.concat([df, pd.DataFrame(columns=df2)])\n", + "#ORDENA TODAS AS VARIÁVEIS \n", + "df = df[ordem]\n", + "\n", + "df.to_csv('/content/uf.csv', index=False, encoding='utf-8', na_rep='')" + ], + "metadata": { + "id": "oBKWb3F6z_7E" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Município" + ], + "metadata": { + "id": "GuTAdm0WqxK3" + } + }, + { + "cell_type": "code", + "source": [ + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/AFD_2021_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/ICG_2021_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/IED_2021_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/ATU_2021_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/HAD_2021_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/DSU_2021_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/IRD_2021_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/TDI_2021_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/tnr_municipios_2021.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/tx_rend_municipios_2021.zip" + ], + "metadata": { + "id": "A0OiYmdYrBHb" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/AFD_2022_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ICG_2022_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IED_2022_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ATU_2022_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/HAD_2022_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/DSU_2022_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IRD_2022_MUNICIPIOS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/TDI_2022_MUNICIPIOS.zip" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "k8-Pm8Vz7OaA", + "outputId": "79547f49-52e5-42e7-b736-817cda78e1ba" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "source": [ - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/tx_rend_escolas_2021.zip" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "OVqdQPehVbal", - "outputId": "fd8bdc4c-6b6b-476d-f26c-f79e464ed32e" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2022-09-14 09:29:58-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/tx_rend_escolas_2021.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 69251035 (66M) [application/zip]\n", - "Saving to: ‘tx_rend_escolas_2021.zip’\n", - "\n", - "tx_rend_escolas_202 100%[===================>] 66.04M 298KB/s in 4m 22s \n", - "\n", - "2022-09-14 09:34:22 (258 KB/s) - ‘tx_rend_escolas_2021.zip’ saved [69251035/69251035]\n", - "\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "--2023-02-11 14:43:00-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/AFD_2022_MUNICIPIOS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 23472229 (22M) [application/zip]\n", + "Saving to: ‘AFD_2022_MUNICIPIOS.zip’\n", + "\n", + "AFD_2022_MUNICIPIOS 100%[===================>] 22.38M 690KB/s in 35s \n", + "\n", + "2023-02-11 14:43:36 (660 KB/s) - ‘AFD_2022_MUNICIPIOS.zip’ saved [23472229/23472229]\n", + "\n", + "--2023-02-11 14:43:36-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ICG_2022_MUNICIPIOS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 6890745 (6.6M) [application/zip]\n", + "Saving to: ‘ICG_2022_MUNICIPIOS.zip’\n", + "\n", + "ICG_2022_MUNICIPIOS 100%[===================>] 6.57M 711KB/s in 10s \n", + "\n", + "2023-02-11 14:43:47 (655 KB/s) - ‘ICG_2022_MUNICIPIOS.zip’ saved [6890745/6890745]\n", + "\n", + "--2023-02-11 14:43:47-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IED_2022_MUNICIPIOS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 18534006 (18M) [application/zip]\n", + "Saving to: ‘IED_2022_MUNICIPIOS.zip’\n", + "\n", + "IED_2022_MUNICIPIOS 100%[===================>] 17.67M 705KB/s in 27s \n", + "\n", + "2023-02-11 14:44:15 (673 KB/s) - ‘IED_2022_MUNICIPIOS.zip’ saved [18534006/18534006]\n", + "\n", + "--2023-02-11 14:44:15-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ATU_2022_MUNICIPIOS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 18120878 (17M) [application/zip]\n", + "Saving to: ‘ATU_2022_MUNICIPIOS.zip’\n", + "\n", + "ATU_2022_MUNICIPIOS 100%[===================>] 17.28M 689KB/s in 26s \n", + "\n", + "2023-02-11 14:44:42 (669 KB/s) - ‘ATU_2022_MUNICIPIOS.zip’ saved [18120878/18120878]\n", + "\n", + "--2023-02-11 14:44:43-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/HAD_2022_MUNICIPIOS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 14552148 (14M) [application/zip]\n", + "Saving to: ‘HAD_2022_MUNICIPIOS.zip’\n", + "\n", + "HAD_2022_MUNICIPIOS 100%[===================>] 13.88M 677KB/s in 22s \n", + "\n", + "2023-02-11 14:45:05 (650 KB/s) - ‘HAD_2022_MUNICIPIOS.zip’ saved [14552148/14552148]\n", + "\n", + "--2023-02-11 14:45:05-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/DSU_2022_MUNICIPIOS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 11135907 (11M) [application/zip]\n", + "Saving to: ‘DSU_2022_MUNICIPIOS.zip’\n", + "\n", + "DSU_2022_MUNICIPIOS 100%[===================>] 10.62M 719KB/s in 16s \n", + "\n", + "2023-02-11 14:45:23 (674 KB/s) - ‘DSU_2022_MUNICIPIOS.zip’ saved [11135907/11135907]\n", + "\n", + "--2023-02-11 14:45:23-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IRD_2022_MUNICIPIOS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 6044184 (5.8M) [application/zip]\n", + "Saving to: ‘IRD_2022_MUNICIPIOS.zip’\n", + "\n", + "IRD_2022_MUNICIPIOS 100%[===================>] 5.76M 690KB/s in 9.3s \n", + "\n", + "2023-02-11 14:45:33 (635 KB/s) - ‘IRD_2022_MUNICIPIOS.zip’ saved [6044184/6044184]\n", + "\n", + "--2023-02-11 14:45:33-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/TDI_2022_MUNICIPIOS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 14627876 (14M) [application/zip]\n", + "Saving to: ‘TDI_2022_MUNICIPIOS.zip’\n", + "\n", + "TDI_2022_MUNICIPIOS 100%[===================>] 13.95M 697KB/s in 22s \n", + "\n", + "2023-02-11 14:45:56 (654 KB/s) - ‘TDI_2022_MUNICIPIOS.zip’ saved [14627876/14627876]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "ordem = ['id_municipio',\n", + "'localizacao',\n", + "'rede',\n", + "'atu_ei',\n", + "'atu_ei_creche',\n", + "'atu_ei_pre_escola',\n", + "'atu_ef',\n", + "'atu_ef_anos_iniciais',\n", + "'atu_ef_anos_finais',\n", + "'atu_ef_1_ano',\n", + "'atu_ef_2_ano',\n", + "'atu_ef_3_ano',\n", + "'atu_ef_4_ano',\n", + "'atu_ef_5_ano',\n", + "'atu_ef_6_ano',\n", + "'atu_ef_7_ano',\n", + "'atu_ef_8_ano',\n", + "'atu_ef_9_ano',\n", + "'atu_ef_turmas_unif_multi_fluxo',\n", + "'atu_em',\n", + "'atu_em_1_ano',\n", + "'atu_em_2_ano',\n", + "'atu_em_3_ano',\n", + "'atu_em_4_ano',\n", + "'atu_em_nao_seriado',\n", + "'had_ei',\n", + "'had_ei_creche',\n", + "'had_ei_pre_escola',\n", + "'had_ef',\n", + "'had_ef_anos_iniciais',\n", + "'had_ef_anos_finais',\n", + "'had_ef_1_ano',\n", + "'had_ef_2_ano',\n", + "'had_ef_3_ano',\n", + "'had_ef_4_ano',\n", + "'had_ef_5_ano',\n", + "'had_ef_6_ano',\n", + "'had_ef_7_ano',\n", + "'had_ef_8_ano',\n", + "'had_ef_9_ano',\n", + "'had_em',\n", + "'had_em_1_ano',\n", + "'had_em_2_ano',\n", + "'had_em_3_ano',\n", + "'had_em_4_ano',\n", + "'had_em_nao_seriado',\n", + "'tdi_ef',\n", + "'tdi_ef_anos_iniciais',\n", + "'tdi_ef_anos_finais',\n", + "'tdi_ef_1_ano',\n", + "'tdi_ef_2_ano',\n", + "'tdi_ef_3_ano',\n", + "'tdi_ef_4_ano',\n", + "'tdi_ef_5_ano',\n", + "'tdi_ef_6_ano',\n", + "'tdi_ef_7_ano',\n", + "'tdi_ef_8_ano',\n", + "'tdi_ef_9_ano',\n", + "'tdi_em',\n", + "'tdi_em_1_ano',\n", + "'tdi_em_2_ano',\n", + "'tdi_em_3_ano',\n", + "'tdi_em_4_ano',\n", + "'taxa_aprovacao_ef',\n", + "'taxa_aprovacao_ef_anos_iniciais',\n", + "'taxa_aprovacao_ef_anos_finais',\n", + "'taxa_aprovacao_ef_1_ano',\n", + "'taxa_aprovacao_ef_2_ano',\n", + "'taxa_aprovacao_ef_3_ano',\n", + "'taxa_aprovacao_ef_4_ano',\n", + "'taxa_aprovacao_ef_5_ano',\n", + "'taxa_aprovacao_ef_6_ano',\n", + "'taxa_aprovacao_ef_7_ano',\n", + "'taxa_aprovacao_ef_8_ano',\n", + "'taxa_aprovacao_ef_9_ano',\n", + "'taxa_aprovacao_em',\n", + "'taxa_aprovacao_em_1_ano',\n", + "'taxa_aprovacao_em_2_ano',\n", + "'taxa_aprovacao_em_3_ano',\n", + "'taxa_aprovacao_em_4_ano',\n", + "'taxa_aprovacao_em_nao_seriado',\n", + "'taxa_reprovacao_ef',\n", + "'taxa_reprovacao_ef_anos_iniciais',\n", + "'taxa_reprovacao_ef_anos_finais',\n", + "'taxa_reprovacao_ef_1_ano',\n", + "'taxa_reprovacao_ef_2_ano',\n", + "'taxa_reprovacao_ef_3_ano',\n", + "'taxa_reprovacao_ef_4_ano',\n", + "'taxa_reprovacao_ef_5_ano',\n", + "'taxa_reprovacao_ef_6_ano',\n", + "'taxa_reprovacao_ef_7_ano',\n", + "'taxa_reprovacao_ef_8_ano',\n", + "'taxa_reprovacao_ef_9_ano',\n", + "'taxa_reprovacao_em',\n", + "'taxa_reprovacao_em_1_ano',\n", + "'taxa_reprovacao_em_2_ano',\n", + "'taxa_reprovacao_em_3_ano',\n", + "'taxa_reprovacao_em_4_ano',\n", + "'taxa_reprovacao_em_nao_seriado',\n", + "'taxa_abandono_ef',\n", + "'taxa_abandono_ef_anos_iniciais',\n", + "'taxa_abandono_ef_anos_finais',\n", + "'taxa_abandono_ef_1_ano',\n", + "'taxa_abandono_ef_2_ano',\n", + "'taxa_abandono_ef_3_ano',\n", + "'taxa_abandono_ef_4_ano',\n", + "'taxa_abandono_ef_5_ano',\n", + "'taxa_abandono_ef_6_ano',\n", + "'taxa_abandono_ef_7_ano',\n", + "'taxa_abandono_ef_8_ano',\n", + "'taxa_abandono_ef_9_ano',\n", + "'taxa_abandono_em',\n", + "'taxa_abandono_em_1_ano',\n", + "'taxa_abandono_em_2_ano',\n", + "'taxa_abandono_em_3_ano',\n", + "'taxa_abandono_em_4_ano',\n", + "'taxa_abandono_em_nao_seriado',\n", + "'tnr_ef',\n", + "'tnr_ef_anos_iniciais',\n", + "'tnr_ef_anos_finais',\n", + "'tnr_ef_1_ano',\n", + "'tnr_ef_2_ano',\n", + "'tnr_ef_3_ano',\n", + "'tnr_ef_4_ano',\n", + "'tnr_ef_5_ano',\n", + "'tnr_ef_6_ano',\n", + "'tnr_ef_7_ano',\n", + "'tnr_ef_8_ano',\n", + "'tnr_ef_9_ano',\n", + "'tnr_em',\n", + "'tnr_em_1_ano',\n", + "'tnr_em_2_ano',\n", + "'tnr_em_3_ano',\n", + "'tnr_em_4_ano',\n", + "'tnr_em_nao_seriado',\n", + "'dsu_ei',\n", + "'dsu_ei_creche',\n", + "'dsu_ei_pre_escola',\n", + "'dsu_ef',\n", + "'dsu_ef_anos_iniciais',\n", + "'dsu_ef_anos_finais',\n", + "'dsu_em',\n", + "'dsu_ep',\n", + "'dsu_eja',\n", + "'dsu_ee',\n", + "'afd_ei_grupo_1',\n", + "'afd_ei_grupo_2',\n", + "'afd_ei_grupo_3',\n", + "'afd_ei_grupo_4',\n", + "'afd_ei_grupo_5',\n", + "'afd_ef_grupo_1',\n", + "'afd_ef_grupo_2',\n", + "'afd_ef_grupo_3',\n", + "'afd_ef_grupo_4',\n", + "'afd_ef_grupo_5',\n", + "'afd_ef_anos_iniciais_grupo_1',\n", + "'afd_ef_anos_iniciais_grupo_2',\n", + "'afd_ef_anos_iniciais_grupo_3',\n", + "'afd_ef_anos_iniciais_grupo_4',\n", + "'afd_ef_anos_iniciais_grupo_5',\n", + "'afd_ef_anos_finais_grupo_1',\n", + "'afd_ef_anos_finais_grupo_2',\n", + "'afd_ef_anos_finais_grupo_3',\n", + "'afd_ef_anos_finais_grupo_4',\n", + "'afd_ef_anos_finais_grupo_5',\n", + "'afd_em_grupo_1',\n", + "'afd_em_grupo_2',\n", + "'afd_em_grupo_3',\n", + "'afd_em_grupo_4',\n", + "'afd_em_grupo_5',\n", + "'afd_eja_fundamental_grupo_1',\n", + "'afd_eja_fundamental_grupo_2',\n", + "'afd_eja_fundamental_grupo_3',\n", + "'afd_eja_fundamental_grupo_4',\n", + "'afd_eja_fundamental_grupo_5',\n", + "'afd_eja_medio_grupo_1',\n", + "'afd_eja_medio_grupo_2',\n", + "'afd_eja_medio_grupo_3',\n", + "'afd_eja_medio_grupo_4',\n", + "'afd_eja_medio_grupo_5',\n", + "'ird_baixa_regularidade',\n", + "'ird_media_baixa',\n", + "'ird_media_alta',\n", + "'ird_alta',\n", + "'ied_ef_nivel_1',\n", + "'ied_ef_nivel_2',\n", + "'ied_ef_nivel_3',\n", + "'ied_ef_nivel_4',\n", + "'ied_ef_nivel_5',\n", + "'ied_ef_nivel_6',\n", + "'ied_ef_anos_iniciais_nivel_1',\n", + "'ied_ef_anos_iniciais_nivel_2',\n", + "'ied_ef_anos_iniciais_nivel_3',\n", + "'ied_ef_anos_iniciais_nivel_4',\n", + "'ied_ef_anos_iniciais_nivel_5',\n", + "'ied_ef_anos_iniciais_nivel_6',\n", + "'ied_ef_anos_finais_nivel_1',\n", + "'ied_ef_anos_finais_nivel_2',\n", + "'ied_ef_anos_finais_nivel_3',\n", + "'ied_ef_anos_finais_nivel_4',\n", + "'ied_ef_anos_finais_nivel_5',\n", + "'ied_ef_anos_finais_nivel_6',\n", + "'ied_em_nivel_1',\n", + "'ied_em_nivel_2',\n", + "'ied_em_nivel_3',\n", + "'ied_em_nivel_4',\n", + "'ied_em_nivel_5',\n", + "'ied_em_nivel_6',\n", + "'icg_nivel_1',\n", + "'icg_nivel_2',\n", + "'icg_nivel_3',\n", + "'icg_nivel_4',\n", + "'icg_nivel_5',\n", + "'icg_nivel_6']" + ], + "metadata": { + "id": "JJrhDnn8UaJ-" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#2021\n", + "rename_afd = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', 'ED_INF_CAT_1':'afd_ei_grupo_1', 'ED_INF_CAT_2':'afd_ei_grupo_2',\n", + " 'ED_INF_CAT_3':'afd_ei_grupo_3', 'ED_INF_CAT_4':'afd_ei_grupo_4', 'ED_INF_CAT_5':'afd_ei_grupo_5', 'FUN_CAT_1':'afd_ef_grupo_1', \n", + " 'FUN_CAT_2':'afd_ef_grupo_2', 'FUN_CAT_3':'afd_ef_grupo_3', 'FUN_CAT_4':'afd_ef_grupo_4', 'FUN_CAT_5':'afd_ef_grupo_5', 'FUN_AI_CAT_1':'afd_ef_anos_iniciais_grupo_1',\n", + " 'FUN_AI_CAT_2':'afd_ef_anos_iniciais_grupo_2', 'FUN_AI_CAT_3':'afd_ef_anos_iniciais_grupo_3', 'FUN_AI_CAT_4':'afd_ef_anos_iniciais_grupo_4',\n", + " 'FUN_AI_CAT_5':'afd_ef_anos_iniciais_grupo_5', 'FUN_AF_CAT_1':'afd_ef_anos_finais_grupo_1', 'FUN_AF_CAT_2':'afd_ef_anos_finais_grupo_2',\n", + " 'FUN_AF_CAT_3':'afd_ef_anos_finais_grupo_3', 'FUN_AF_CAT_4':'afd_ef_anos_finais_grupo_4', 'FUN_AF_CAT_5':'afd_ef_anos_finais_grupo_5',\n", + " 'MED_CAT_1':'afd_em_grupo_1', 'MED_CAT_2':'afd_em_grupo_2', 'MED_CAT_3':'afd_em_grupo_3', 'MED_CAT_4':'afd_em_grupo_4', 'MED_CAT_5':'afd_em_grupo_5',\n", + " 'EJA_FUN_CAT_1':'afd_eja_fundamental_grupo_1', 'EJA_FUN_CAT_2':'afd_eja_fundamental_grupo_2', 'EJA_FUN_CAT_3':'afd_eja_fundamental_grupo_3',\n", + " 'EJA_FUN_CAT_4':'afd_eja_fundamental_grupo_4', 'EJA_FUN_CAT_5':'afd_eja_fundamental_grupo_5', 'EJA_MED_CAT_1':'afd_eja_medio_grupo_1',\n", + " 'EJA_MED_CAT_2':'afd_eja_medio_grupo_2', 'EJA_MED_CAT_3':'afd_eja_medio_grupo_3', 'EJA_MED_CAT_4':'afd_eja_medio_grupo_4', \n", + " 'EJA_MED_CAT_5':'afd_eja_medio_grupo_5'}\n", + "\n", + "with ZipFile('/content/AFD_2021_MUNICIPIOS.zip') as z:\n", + " with z.open('AFD_2021_MUNICIPIOS/AFD_MUNICIPIOS_2021.xlsx') as f:\n", + " afd = pd.read_excel(f, skiprows=10, dtype=str)\n", + " afd.drop(range(67126, 67132), inplace=True)\n", + " afd = afd.replace('--', '')\n", + " afd.rename(columns=rename_afd, inplace=True)\n", + " afd[index] = afd[index].astype(str)\n", + " afd['localizacao'] = afd['localizacao'].str.lower().replace('pública', 'publica')\n", + " afd['rede'] = afd['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_atu = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'atu_ei', 'CRE_CAT_0':'atu_ei_creche', 'PRE_CAT_0':'atu_ei_pre_escola', 'FUN_CAT_0':'atu_ef',\n", + " 'FUN_AI_CAT_0':'atu_ef_anos_iniciais', 'FUN_AF_CAT_0':'atu_ef_anos_finais', 'FUN_01_CAT_0':'atu_ef_1_ano',\n", + " 'FUN_02_CAT_0':'atu_ef_2_ano', 'FUN_03_CAT_0':'atu_ef_3_ano', 'FUN_04_CAT_0':'atu_ef_4_ano',\n", + " 'FUN_05_CAT_0':'atu_ef_5_ano', 'FUN_06_CAT_0':'atu_ef_6_ano', 'FUN_07_CAT_0':'atu_ef_7_ano', \n", + " 'FUN_08_CAT_0':'atu_ef_8_ano', 'FUN_09_CAT_0':'atu_ef_9_ano', 'MULT_ETA_CAT_0':'atu_ef_turmas_unif_multi_fluxo',\n", + " 'MED_CAT_0':'atu_em', 'MED_01_CAT_0':'atu_em_1_ano', 'MED_02_CAT_0':'atu_em_2_ano', 'MED_03_CAT_0':'atu_em_3_ano',\n", + " 'MED_04_CAT_0':'atu_em_4_ano', 'MED_NS_CAT_0':'atu_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/ATU_2021_MUNICIPIOS.zip') as z:\n", + " with z.open('ATU_2021_MUNICIPIOS/ATU_MUNICIPIOS_2021.xlsx') as f:\n", + " atu = pd.read_excel(f, skiprows=8, dtype=str)\n", + " atu.drop(range(66464, 66467), inplace=True)\n", + " atu = atu.replace('--', '')\n", + " atu.rename(columns=rename_atu, inplace=True)\n", + " atu['localizacao'] = atu['localizacao'].str.lower().replace('pública', 'publica')\n", + " atu['rede'] = atu['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_dsu = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'dsu_ei', 'CRE_CAT_0':'dsu_ei_creche', 'PRE_CAT_0':'dsu_ei_pre_escola', 'FUN_CAT_0':'dsu_ef',\n", + " 'FUN_AI_CAT_0':'dsu_ef_anos_iniciais', 'FUN_AF_CAT_0':'dsu_ef_anos_finais', 'MED_CAT_0':'dsu_em',\n", + " 'PROF_CAT_0':'dsu_ep', 'EJA_CAT_0':'dsu_eja', 'EDU_BAS_CAT_0':'dsu_ee'}\n", + "\n", + "with ZipFile('/content/DSU_2021_MUNICIPIOS.zip') as z:\n", + " with z.open('DSU_2021_MUNICIPIOS/DSU_MUNICIPIOS_2021.xlsx') as f:\n", + " dsu = pd.read_excel(f, skiprows=9, dtype=str)\n", + " dsu.drop(range(67294, 67296), inplace=True)\n", + " dsu = dsu.replace('--', '')\n", + " dsu.rename(columns=rename_dsu, inplace=True)\n", + " dsu['localizacao'] = dsu['localizacao'].str.lower().replace('pública', 'publica')\n", + " dsu['rede'] = dsu['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_had = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'had_ei', 'CRE_CAT_0':'had_ei_creche', 'PRE_CAT_0':'had_ei_pre_escola',\n", + " 'FUN_CAT_0':'had_ef', 'FUN_AI_CAT_0':'had_ef_anos_iniciais', 'FUN_AF_CAT_0':'had_ef_anos_finais', 'FUN_01_CAT_0':'had_ef_1_ano',\n", + " 'FUN_02_CAT_0':'had_ef_2_ano', 'FUN_03_CAT_0':'had_ef_3_ano', 'FUN_04_CAT_0':'had_ef_4_ano', 'FUN_05_CAT_0':'had_ef_5_ano',\n", + " 'FUN_06_CAT_0':'had_ef_6_ano', 'FUN_07_CAT_0':'had_ef_7_ano', 'FUN_08_CAT_0':'had_ef_8_ano', 'FUN_09_CAT_0':'had_ef_9_ano',\n", + " 'MED_CAT_0':'had_em', 'MED_01_CAT_0':'had_em_1_ano', 'MED_02_CAT_0':'had_em_2_ano', 'MED_03_CAT_0':'had_em_3_ano',\n", + " 'MED_04_CAT_0':'had_em_4_ano', 'MED_NS_CAT_01':'had_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/HAD_2021_MUNICIPIOS.zip') as z:\n", + " with z.open('HAD_2021_MUNICIPIOS/HAD_MUNICIPIOS_2021.xlsx') as f:\n", + " had = pd.read_excel(f, skiprows=8, dtype=str)\n", + " had.drop(range(65548, 65551), inplace=True)\n", + " had = had.replace('--', '')\n", + " had.rename(columns=rename_had, inplace=True)\n", + " had['localizacao'] = had['localizacao'].str.lower().replace('pública', 'publica')\n", + " had['rede'] = had['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_icg = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'EDU_BAS_CAT_1':'icg_nivel_1', 'EDU_BAS_CAT_2':'icg_nivel_2', 'EDU_BAS_CAT_3':'icg_nivel_3',\n", + " 'EDU_BAS_CAT_4':'icg_nivel_4', 'EDU_BAS_CAT_5':'icg_nivel_5', 'EDU_BAS_CAT_6':'icg_nivel_6'}\n", + "\n", + "with ZipFile('/content/ICG_2021_MUNICIPIOS.zip') as z:\n", + " with z.open('ICG_2021_MUNICIPIOS/ICG_MUNICIPIOS_2021.xlsx') as f:\n", + " icg = pd.read_excel(f, skiprows=8, dtype=str)\n", + " icg.drop(range(67302, 67307), inplace=True)\n", + " icg = icg.replace('--', '')\n", + " icg.rename(columns=rename_icg, inplace=True)\n", + " icg['localizacao'] = icg['localizacao'].str.lower().replace('pública', 'publica')\n", + " icg['rede'] = icg['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_ied = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'FUN_CAT_1':'ied_ef_nivel_1', 'FUN_CAT_2':'ied_ef_nivel_2', 'FUN_CAT_3':'ied_ef_nivel_3',\n", + " 'FUN_CAT_4':'ied_ef_nivel_4', 'FUN_CAT_5':'ied_ef_nivel_5', 'FUN_CAT_6':'ied_ef_nivel_6',\n", + " 'FUN_AI_CAT_1':'ied_ef_anos_iniciais_nivel_1', 'FUN_AI_CAT_2':'ied_ef_anos_iniciais_nivel_2',\n", + " 'FUN_AI_CAT_3':'ied_ef_anos_iniciais_nivel_3', 'FUN_AI_CAT_4':'ied_ef_anos_iniciais_nivel_4',\n", + " 'FUN_AI_CAT_5':'ied_ef_anos_iniciais_nivel_5', 'FUN_AI_CAT_6':'ied_ef_anos_iniciais_nivel_6',\n", + " 'FUN_AF_CAT_1':'ied_ef_anos_finais_nivel_1', 'FUN_AF_CAT_2':'ied_ef_anos_finais_nivel_2',\n", + " 'FUN_AF_CAT_3':'ied_ef_anos_finais_nivel_3', 'FUN_AF_CAT_4':'ied_ef_anos_finais_nivel_4',\n", + " 'FUN_AF_CAT_5':'ied_ef_anos_finais_nivel_5', 'FUN_AF_CAT_6':'ied_ef_anos_finais_nivel_6',\n", + " 'MED_CAT_1':'ied_em_nivel_1', 'MED_CAT_2':'ied_em_nivel_2', 'MED_CAT_3':'ied_em_nivel_3',\n", + " 'MED_CAT_4':'ied_em_nivel_4', 'MED_CAT_5':'ied_em_nivel_5', 'MED_CAT_6':'ied_em_nivel_6'}\n", + "\n", + "with ZipFile('/content/IED_2021_MUNICIPIOS.zip') as z:\n", + " with z.open('IED_2021_MUNICIPIOS/IED_MUNICIPIOS_2021.xlsx') as f:\n", + " ied = pd.read_excel(f, skiprows=10, dtype=str)\n", + " ied.drop(range(66286, 66291), inplace=True)\n", + " ied = ied.replace('--', '')\n", + " ied.rename(columns=rename_ied, inplace=True)\n", + " ied['localizacao'] = ied['localizacao'].str.lower().replace('pública', 'publica')\n", + " ied['rede'] = ied['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_ird = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'EDU_BAS_CAT_1':'ird_baixa_regularidade', 'EDU_BAS_CAT_2':'ird_media_baixa', \n", + " 'EDU_BAS_CAT_3':'ird_media_alta', 'EDU_BAS_CAT_4':'ird_alta'}\n", + "\n", + "with ZipFile('/content/IRD_2021_MUNICIPIOS.zip') as z:\n", + " with z.open('IRD_2021_MUNICIPIOS/IRD_MUNICIPIOS_2021.xlsx') as f:\n", + " ird = pd.read_excel(f, skiprows=9, dtype=str)\n", + " ird.drop(range(66876, 66881), inplace=True)\n", + " ird = ird.replace('--', '')\n", + " ird.rename(columns=rename_ird, inplace=True)\n", + " ird['localizacao'] = ird['localizacao'].str.lower().replace('pública', 'publica')\n", + " ird['rede'] = ird['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_tdi = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'FUN_CAT_0':'tdi_ef', 'FUN_AI_CAT_0':'tdi_ef_anos_iniciais', 'FUN_AF_CAT_0':'tdi_ef_anos_finais',\n", + " 'FUN_01_CAT_0':'tdi_ef_1_ano', 'FUN_02_CAT_0':'tdi_ef_2_ano', 'FUN_03_CAT_0':'tdi_ef_3_ano',\n", + " 'FUN_04_CAT_0':'tdi_ef_4_ano', 'FUN_05_CAT_0':'tdi_ef_5_ano', 'FUN_06_CAT_0':'tdi_ef_6_ano',\n", + " 'FUN_07_CAT_0':'tdi_ef_7_ano', 'FUN_08_CAT_0':'tdi_ef_8_ano', 'FUN_09_CAT_0':'tdi_ef_9_ano',\n", + " 'MED_CAT_0':'tdi_em', 'MED_01_CAT_0':'tdi_em_1_ano', 'MED_02_CAT_0':'tdi_em_2_ano',\n", + " 'MED_03_CAT_0':'tdi_em_3_ano', 'MED_04_CAT_0':'tdi_em_4_ano'}\n", + "\n", + "with ZipFile('/content/TDI_2021_MUNICIPIOS.zip') as z:\n", + " with z.open('TDI_2021_MUNICIPIOS/TDI_MUNICIPIOS_2021.xlsx') as f:\n", + " tdi = pd.read_excel(f, skiprows=8, dtype=str)\n", + " tdi.drop(range(65538, 65540), inplace=True)\n", + " tdi = tdi.replace('--', '')\n", + " tdi.rename(columns=rename_tdi, inplace=True)\n", + " tdi['localizacao'] = tdi['localizacao'].str.lower().replace('pública', 'publica')\n", + " tdi['rede'] = tdi['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_tnr = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", + " '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", + " '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", + " '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", + " '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", + " '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/tnr_municipios_2021.zip') as z:\n", + " with z.open('tnr_municipios_2021/tnr_municipios_2021.xlsx') as f:\n", + " tnr = pd.read_excel(f, skiprows=8, dtype=str)\n", + " tnr.drop(range(65574, 65577), inplace=True)\n", + " tnr = tnr.replace('--', '')\n", + " tnr.rename(columns=rename_tnr, inplace=True)\n", + " tnr[index] = tnr[index].astype(str)\n", + " tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", + " tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", + " \n", + "\n", + "rename_tx = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", + " '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", + " '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", + " '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", + " '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", + " '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", + " '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", + " '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", + " '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", + " '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", + " '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", + " '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", + " '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", + " '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", + " '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", + " '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", + " '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", + " '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", + " '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/tx_rend_municipios_2021.zip') as z:\n", + " with z.open('tx_rend_municipios_2021/tx_rend_municipios_2021.xlsx') as f:\n", + " tx = pd.read_excel(f, skiprows=8, dtype=str)\n", + " tx.drop(range(65574, 65576), inplace=True)\n", + " tx = tx.replace('--', '')\n", + " tx.rename(columns=rename_tx, inplace=True)\n", + " tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", + " tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", + " \n", + "index = ['ano',\t'NO_REGIAO',\t'SG_UF',\t'id_municipio',\t'NO_MUNICIPIO',\t'localizacao',\t'rede'] \n", + "df = pd.merge(afd, atu, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, dsu, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, had, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, icg, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, ied, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, ird, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, tdi, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, tnr, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, tx, how='outer', left_on=index, right_on=index)\n", + "df = df[ordem]\n", + "df.to_csv('/content/municipio.csv', index=False, encoding='utf-8', na_rep='')" + ], + "metadata": { + "id": "zDqi1IAvqvp2" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#2022\n", + "rename_afd = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', 'ED_INF_CAT_1':'afd_ei_grupo_1', 'ED_INF_CAT_2':'afd_ei_grupo_2',\n", + " 'ED_INF_CAT_3':'afd_ei_grupo_3', 'ED_INF_CAT_4':'afd_ei_grupo_4', 'ED_INF_CAT_5':'afd_ei_grupo_5', 'FUN_CAT_1':'afd_ef_grupo_1', \n", + " 'FUN_CAT_2':'afd_ef_grupo_2', 'FUN_CAT_3':'afd_ef_grupo_3', 'FUN_CAT_4':'afd_ef_grupo_4', 'FUN_CAT_5':'afd_ef_grupo_5', 'FUN_AI_CAT_1':'afd_ef_anos_iniciais_grupo_1',\n", + " 'FUN_AI_CAT_2':'afd_ef_anos_iniciais_grupo_2', 'FUN_AI_CAT_3':'afd_ef_anos_iniciais_grupo_3', 'FUN_AI_CAT_4':'afd_ef_anos_iniciais_grupo_4',\n", + " 'FUN_AI_CAT_5':'afd_ef_anos_iniciais_grupo_5', 'FUN_AF_CAT_1':'afd_ef_anos_finais_grupo_1', 'FUN_AF_CAT_2':'afd_ef_anos_finais_grupo_2',\n", + " 'FUN_AF_CAT_3':'afd_ef_anos_finais_grupo_3', 'FUN_AF_CAT_4':'afd_ef_anos_finais_grupo_4', 'FUN_AF_CAT_5':'afd_ef_anos_finais_grupo_5',\n", + " 'MED_CAT_1':'afd_em_grupo_1', 'MED_CAT_2':'afd_em_grupo_2', 'MED_CAT_3':'afd_em_grupo_3', 'MED_CAT_4':'afd_em_grupo_4', 'MED_CAT_5':'afd_em_grupo_5',\n", + " 'EJA_FUN_CAT_1':'afd_eja_fundamental_grupo_1', 'EJA_FUN_CAT_2':'afd_eja_fundamental_grupo_2', 'EJA_FUN_CAT_3':'afd_eja_fundamental_grupo_3',\n", + " 'EJA_FUN_CAT_4':'afd_eja_fundamental_grupo_4', 'EJA_FUN_CAT_5':'afd_eja_fundamental_grupo_5', 'EJA_MED_CAT_1':'afd_eja_medio_grupo_1',\n", + " 'EJA_MED_CAT_2':'afd_eja_medio_grupo_2', 'EJA_MED_CAT_3':'afd_eja_medio_grupo_3', 'EJA_MED_CAT_4':'afd_eja_medio_grupo_4', \n", + " 'EJA_MED_CAT_5':'afd_eja_medio_grupo_5'}\n", + "\n", + "with ZipFile('/content/AFD_2022_MUNICIPIOS.zip') as z:\n", + " with z.open('AFD_2022_MUNICIPIOS/AFD_MUNICIPIOS_2022.xlsx') as f:\n", + " afd = pd.read_excel(f, skiprows=10, dtype=str)\n", + " afd.drop(range(67118, 67123), inplace=True)\n", + " afd = afd.replace('--', '')\n", + " afd.rename(columns=rename_afd, inplace=True)\n", + " afd['localizacao'] = afd['localizacao'].str.lower().replace('pública', 'publica')\n", + " afd['rede'] = afd['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_atu = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'atu_ei', 'CRE_CAT_0':'atu_ei_creche', 'PRE_CAT_0':'atu_ei_pre_escola', 'FUN_CAT_0':'atu_ef',\n", + " 'FUN_AI_CAT_0':'atu_ef_anos_iniciais', 'FUN_AF_CAT_0':'atu_ef_anos_finais', 'FUN_01_CAT_0':'atu_ef_1_ano',\n", + " 'FUN_02_CAT_0':'atu_ef_2_ano', 'FUN_03_CAT_0':'atu_ef_3_ano', 'FUN_04_CAT_0':'atu_ef_4_ano',\n", + " 'FUN_05_CAT_0':'atu_ef_5_ano', 'FUN_06_CAT_0':'atu_ef_6_ano', 'FUN_07_CAT_0':'atu_ef_7_ano', \n", + " 'FUN_08_CAT_0':'atu_ef_8_ano', 'FUN_09_CAT_0':'atu_ef_9_ano', 'MULT_ETA_CAT_0':'atu_ef_turmas_unif_multi_fluxo',\n", + " 'MED_CAT_0':'atu_em', 'MED_01_CAT_0':'atu_em_1_ano', 'MED_02_CAT_0':'atu_em_2_ano', 'MED_03_CAT_0':'atu_em_3_ano',\n", + " 'MED_04_CAT_0':'atu_em_4_ano', 'MED_NS_CAT_0':'atu_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/ATU_2022_MUNICIPIOS.zip') as z:\n", + " with z.open('ATU_2022_MUNICIPIOS/ATU_MUNICIPIOS_2022.xlsx') as f:\n", + " atu = pd.read_excel(f, skiprows=8, dtype=str)\n", + " atu.drop(range(66515, 66518), inplace=True)\n", + " atu = atu.replace('--', '')\n", + " atu.rename(columns=rename_atu, inplace=True)\n", + " atu['localizacao'] = atu['localizacao'].str.lower().replace('pública', 'publica')\n", + " atu['rede'] = atu['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_dsu = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'dsu_ei', 'CRE_CAT_0':'dsu_ei_creche', 'PRE_CAT_0':'dsu_ei_pre_escola', 'FUN_CAT_0':'dsu_ef',\n", + " 'FUN_AI_CAT_0':'dsu_ef_anos_iniciais', 'FUN_AF_CAT_0':'dsu_ef_anos_finais', 'MED_CAT_0':'dsu_em',\n", + " 'PROF_CAT_0':'dsu_ep', 'EJA_CAT_0':'dsu_eja', 'EDU_BAS_CAT_0':'dsu_ee'}\n", + "\n", + "with ZipFile('/content/DSU_2022_MUNICIPIOS.zip') as z:\n", + " with z.open('DSU_2022_MUNICIPIOS/DSU_MUNICIPIOS_2022.xlsx') as f:\n", + " dsu = pd.read_excel(f, skiprows=9, dtype=str)\n", + " dsu.drop(range(67301, 67303), inplace=True)\n", + " dsu = dsu.replace('--', '')\n", + " dsu.rename(columns=rename_dsu, inplace=True)\n", + " dsu['localizacao'] = dsu['localizacao'].str.lower().replace('pública', 'publica')\n", + " dsu['rede'] = dsu['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_had = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'had_ei', 'CRE_CAT_0':'had_ei_creche', 'PRE_CAT_0':'had_ei_pre_escola',\n", + " 'FUN_CAT_0':'had_ef', 'FUN_AI_CAT_0':'had_ef_anos_iniciais', 'FUN_AF_CAT_0':'had_ef_anos_finais', 'FUN_01_CAT_0':'had_ef_1_ano',\n", + " 'FUN_02_CAT_0':'had_ef_2_ano', 'FUN_03_CAT_0':'had_ef_3_ano', 'FUN_04_CAT_0':'had_ef_4_ano', 'FUN_05_CAT_0':'had_ef_5_ano',\n", + " 'FUN_06_CAT_0':'had_ef_6_ano', 'FUN_07_CAT_0':'had_ef_7_ano', 'FUN_08_CAT_0':'had_ef_8_ano', 'FUN_09_CAT_0':'had_ef_9_ano',\n", + " 'MED_CAT_0':'had_em', 'MED_01_CAT_0':'had_em_1_ano', 'MED_02_CAT_0':'had_em_2_ano', 'MED_03_CAT_0':'had_em_3_ano',\n", + " 'MED_04_CAT_0':'had_em_4_ano', 'MED_NS_CAT_01':'had_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/HAD_2022_MUNICIPIOS.zip') as z:\n", + " with z.open('HAD_2022_MUNICIPIOS/HAD_MUNICIPIOS_2022.xlsx') as f:\n", + " had = pd.read_excel(f, skiprows=8, dtype=str)\n", + " had.drop(range(66509, 66512), inplace=True)\n", + " had = had.replace('--', '')\n", + " had.rename(columns=rename_had, inplace=True)\n", + " had['localizacao'] = had['localizacao'].str.lower().replace('pública', 'publica')\n", + " had['rede'] = had['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_icg = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'EDU_BAS_CAT_1':'icg_nivel_1', 'EDU_BAS_CAT_2':'icg_nivel_2', 'EDU_BAS_CAT_3':'icg_nivel_3',\n", + " 'EDU_BAS_CAT_4':'icg_nivel_4', 'EDU_BAS_CAT_5':'icg_nivel_5', 'EDU_BAS_CAT_6':'icg_nivel_6'}\n", + "\n", + "with ZipFile('/content/ICG_2022_MUNICIPIOS.zip') as z:\n", + " with z.open('ICG_2022_MUNICIPIOS/ICG_MUNICIPIOS_2022.xlsx') as f:\n", + " icg = pd.read_excel(f, skiprows=8, dtype=str)\n", + " icg.drop(range(67303, 67308), inplace=True)\n", + " icg = icg.replace('--', '')\n", + " icg.rename(columns=rename_icg, inplace=True)\n", + " icg['localizacao'] = icg['localizacao'].str.lower().replace('pública', 'publica')\n", + " icg['rede'] = icg['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_ied = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'FUN_CAT_1':'ied_ef_nivel_1', 'FUN_CAT_2':'ied_ef_nivel_2', 'FUN_CAT_3':'ied_ef_nivel_3',\n", + " 'FUN_CAT_4':'ied_ef_nivel_4', 'FUN_CAT_5':'ied_ef_nivel_5', 'FUN_CAT_6':'ied_ef_nivel_6',\n", + " 'FUN_AI_CAT_1':'ied_ef_anos_iniciais_nivel_1', 'FUN_AI_CAT_2':'ied_ef_anos_iniciais_nivel_2',\n", + " 'FUN_AI_CAT_3':'ied_ef_anos_iniciais_nivel_3', 'FUN_AI_CAT_4':'ied_ef_anos_iniciais_nivel_4',\n", + " 'FUN_AI_CAT_5':'ied_ef_anos_iniciais_nivel_5', 'FUN_AI_CAT_6':'ied_ef_anos_iniciais_nivel_6',\n", + " 'FUN_AF_CAT_1':'ied_ef_anos_finais_nivel_1', 'FUN_AF_CAT_2':'ied_ef_anos_finais_nivel_2',\n", + " 'FUN_AF_CAT_3':'ied_ef_anos_finais_nivel_3', 'FUN_AF_CAT_4':'ied_ef_anos_finais_nivel_4',\n", + " 'FUN_AF_CAT_5':'ied_ef_anos_finais_nivel_5', 'FUN_AF_CAT_6':'ied_ef_anos_finais_nivel_6',\n", + " 'MED_CAT_1':'ied_em_nivel_1', 'MED_CAT_2':'ied_em_nivel_2', 'MED_CAT_3':'ied_em_nivel_3',\n", + " 'MED_CAT_4':'ied_em_nivel_4', 'MED_CAT_5':'ied_em_nivel_5', 'MED_CAT_6':'ied_em_nivel_6'}\n", + "\n", + "with ZipFile('/content/IED_2022_MUNICIPIOS.zip') as z:\n", + " with z.open('IED_2022_MUNICIPIOS/IED_MUNICIPIOS_2022.xlsx') as f:\n", + " ied = pd.read_excel(f, skiprows=10, dtype=str)\n", + " ied.drop(range(66266, 66271), inplace=True)\n", + " ied = ied.replace('--', '')\n", + " ied.rename(columns=rename_ied, inplace=True)\n", + " ied['localizacao'] = ied['localizacao'].str.lower().replace('pública', 'publica')\n", + " ied['rede'] = ied['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_ird = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'EDU_BAS_CAT_1':'ird_baixa_regularidade', 'EDU_BAS_CAT_2':'ird_media_baixa', \n", + " 'EDU_BAS_CAT_3':'ird_media_alta', 'EDU_BAS_CAT_4':'ird_alta'}\n", + "\n", + "with ZipFile('/content/IRD_2022_MUNICIPIOS.zip') as z:\n", + " with z.open('IRD_2022_MUNICIPIOS/IRD_MUNICIPIOS_2022.xlsx') as f:\n", + " ird = pd.read_excel(f, skiprows=9, dtype=str)\n", + " ird.drop(range(66845, 66850), inplace=True)\n", + " ird = ird.replace('--', '')\n", + " ird.rename(columns=rename_ird, inplace=True)\n", + " ird['localizacao'] = ird['localizacao'].str.lower().replace('pública', 'publica')\n", + " ird['rede'] = ird['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_tdi = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'FUN_CAT_0':'tdi_ef', 'FUN_AI_CAT_0':'tdi_ef_anos_iniciais', 'FUN_AF_CAT_0':'tdi_ef_anos_finais',\n", + " 'FUN_01_CAT_0':'tdi_ef_1_ano', 'FUN_02_CAT_0':'tdi_ef_2_ano', 'FUN_03_CAT_0':'tdi_ef_3_ano',\n", + " 'FUN_04_CAT_0':'tdi_ef_4_ano', 'FUN_05_CAT_0':'tdi_ef_5_ano', 'FUN_06_CAT_0':'tdi_ef_6_ano',\n", + " 'FUN_07_CAT_0':'tdi_ef_7_ano', 'FUN_08_CAT_0':'tdi_ef_8_ano', 'FUN_09_CAT_0':'tdi_ef_9_ano',\n", + " 'MED_CAT_0':'tdi_em', 'MED_01_CAT_0':'tdi_em_1_ano', 'MED_02_CAT_0':'tdi_em_2_ano',\n", + " 'MED_03_CAT_0':'tdi_em_3_ano', 'MED_04_CAT_0':'tdi_em_4_ano'}\n", + "\n", + "with ZipFile('/content/TDI_2022_MUNICIPIOS.zip') as z:\n", + " with z.open('TDI_2022_MUNICIPIOS/TDI_MUNICIPIOS_2022.xlsx') as f:\n", + " tdi = pd.read_excel(f, skiprows=8, dtype=str)\n", + " tdi.drop(range(65548, 65550), inplace=True)\n", + " tdi = tdi.replace('--', '')\n", + " tdi.rename(columns=rename_tdi, inplace=True)\n", + " tdi['localizacao'] = tdi['localizacao'].str.lower().replace('pública', 'publica')\n", + " tdi['rede'] = tdi['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "# rename_tnr = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", + "# '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", + "# '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", + "# '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", + "# '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", + "# '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", + "\n", + "# with ZipFile('/content/tnr_municipios_2021.zip') as z:\n", + "# with z.open('tnr_municipios_2021/tnr_municipios_2021.xlsx') as f:\n", + "# tnr = pd.read_excel(f, skiprows=8, dtype=str)\n", + "# tnr.drop(range(65574, 65577), inplace=True)\n", + "# tnr = tnr.replace('--', '')\n", + "# tnr.rename(columns=rename_tnr, inplace=True)\n", + "# tnr[index] = tnr[index].astype(str)\n", + "# tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", + "# tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", + " \n", + "\n", + "# rename_tx = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", + "# '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", + "# '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", + "# '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", + "# '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", + "# '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", + "# '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", + "# '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", + "# '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", + "# '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", + "# '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", + "# '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", + "# '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", + "# '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", + "# '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", + "# '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", + "# '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", + "# '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", + "# '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", + "\n", + "# with ZipFile('/content/tx_rend_municipios_2021.zip') as z:\n", + "# with z.open('tx_rend_municipios_2021/tx_rend_municipios_2021.xlsx') as f:\n", + "# tx = pd.read_excel(f, skiprows=8, dtype=str)\n", + "# tx.drop(range(65574, 65576), inplace=True)\n", + "# tx = tx.replace('--', '')\n", + "# tx.rename(columns=rename_tx, inplace=True)\n", + "# tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", + "# tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", + " \n", + "index = ['ano',\t'NO_REGIAO',\t'SG_UF',\t'id_municipio',\t'NO_MUNICIPIO',\t'localizacao',\t'rede'] \n", + "df = pd.merge(afd, atu, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, dsu, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, had, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, icg, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, ied, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, ird, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, tdi, how='outer', left_on=index, right_on=index)\n", + "# df = pd.merge(df, tnr, how='outer', left_on=index, right_on=index)\n", + "# df = pd.merge(df, tx, how='outer', left_on=index, right_on=index)\n", + "#COMO ALGUNS DADOS NÃO FORAM ATUALIZADOS, CRIA-SE DF2 COMO COMPLEMENTAR PARA A ORDENAÇÃO DA BASE\n", + "df_list = df.columns\n", + "df2 = list(set(ordem) - set(df_list))\n", + "df = pd.concat([df, pd.DataFrame(columns=df2)])\n", + "#ORDENA TODAS AS VARIÁVEIS \n", + "df = df[ordem]\n", + "df.to_csv('/content/municipio_2022.csv', index=False, encoding='utf-8', na_rep='')" + ], + "metadata": { + "id": "b0qftgG48U4F" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Escola" + ], + "metadata": { + "id": "E-U-Wg62dkVH" + } + }, + { + "cell_type": "code", + "source": [ + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/AFD_2022_ESCOLAS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ICG_2022_ESCOLAS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IED_2022_ESCOLAS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ATU_2022_ESCOLAS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/HAD_2022_ESCOLAS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/DSU_2022_ESCOLAS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IRD_2022_ESCOLAS.zip\n", + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/TDI_2022_ESCOLAS.zip" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "FdOawGS8rhvd", + "outputId": "40e2261d-ddda-431b-e77b-d889172d8e88" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "source": [ - "rename_tx = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', \n", - " 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", - " '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", - " '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", - " '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", - " '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", - " '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", - " '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", - " '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", - " '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", - " '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", - " '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", - " '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", - " '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", - " '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", - " '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", - " '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", - " '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", - " '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", - " '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/tx_rend_escolas_2021.zip') as z:\n", - " with z.open('tx_rend_escolas_2021/tx_rend_escolas_2021.xlsx') as f:\n", - " tx = pd.read_excel(f, skiprows=8)\n", - " tx.drop([130129, 130130], inplace=True)\n", - " tx.drop(['NO_REGIAO', 'SG_UF', 'NO_MUNICIPIO', 'NO_ENTIDADE', 'Unnamed: 63', 'Unnamed: 64',\n", - " 'Unnamed: 65', 'Unnamed: 66', 'Unnamed: 67', 'Unnamed: 68',\n", - " 'Unnamed: 69', 'Unnamed: 70', 'Unnamed: 71', 'Unnamed: 72',\n", - " 'Unnamed: 73', 'Unnamed: 74', 'Unnamed: 75', 'Unnamed: 76',\n", - " 'Unnamed: 77', 'Unnamed: 78', 'Unnamed: 79', 'Unnamed: 80',\n", - " 'Unnamed: 81', 'Unnamed: 82', 'Unnamed: 83'], axis=1, inplace=True)\n", - " tx.rename(columns=rename_tx, inplace=True)\n", - " tx['id_municipio'] = tx['id_municipio'].astype(int)\n", - " tx['id_escola'] = tx['id_escola'].astype(int)\n", - " tx[index] = tx[index].astype(str)\n", - " tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", - " tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", - " tx = tx.replace('--', '')\n", - " df = pd.merge(df_escola, tx, how='left', left_on=index, right_on=index)\n", - " #df = df[ordem]\n", - " #df.to_csv('/content/escola.csv', index=False, encoding='utf-8', na_rep='')" - ], - "metadata": { - "id": "X9795OZ3VXoO" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "--2023-02-11 18:24:21-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/AFD_2022_ESCOLAS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 59511457 (57M) [application/zip]\n", + "Saving to: ‘AFD_2022_ESCOLAS.zip.1’\n", + "\n", + "AFD_2022_ESCOLAS.zi 100%[===================>] 56.75M 293KB/s in 3m 10s \n", + "\n", + "2023-02-11 18:27:34 (306 KB/s) - ‘AFD_2022_ESCOLAS.zip.1’ saved [59511457/59511457]\n", + "\n", + "--2023-02-11 18:27:34-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ICG_2022_ESCOLAS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 18095429 (17M) [application/zip]\n", + "Saving to: ‘ICG_2022_ESCOLAS.zip’\n", + "\n", + "ICG_2022_ESCOLAS.zi 100%[===================>] 17.26M 283KB/s in 66s \n", + "\n", + "2023-02-11 18:28:41 (269 KB/s) - ‘ICG_2022_ESCOLAS.zip’ saved [18095429/18095429]\n", + "\n", + "--2023-02-11 18:28:41-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IED_2022_ESCOLAS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 39032819 (37M) [application/zip]\n", + "Saving to: ‘IED_2022_ESCOLAS.zip’\n", + "\n", + "IED_2022_ESCOLAS.zi 100%[===================>] 37.22M 357KB/s in 2m 7s \n", + "\n", + "2023-02-11 18:30:50 (301 KB/s) - ‘IED_2022_ESCOLAS.zip’ saved [39032819/39032819]\n", + "\n", + "--2023-02-11 18:30:50-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/ATU_2022_ESCOLAS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 51091323 (49M) [application/zip]\n", + "Saving to: ‘ATU_2022_ESCOLAS.zip’\n", + "\n", + "ATU_2022_ESCOLAS.zi 100%[===================>] 48.72M 364KB/s in 3m 35s \n", + "\n", + "2023-02-11 18:34:27 (232 KB/s) - ‘ATU_2022_ESCOLAS.zip’ saved [51091323/51091323]\n", + "\n", + "--2023-02-11 18:34:27-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/HAD_2022_ESCOLAS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 41905778 (40M) [application/zip]\n", + "Saving to: ‘HAD_2022_ESCOLAS.zip’\n", + "\n", + "HAD_2022_ESCOLAS.zi 100%[===================>] 39.96M 369KB/s in 2m 24s \n", + "\n", + "2023-02-11 18:36:54 (283 KB/s) - ‘HAD_2022_ESCOLAS.zip’ saved [41905778/41905778]\n", + "\n", + "--2023-02-11 18:36:54-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/DSU_2022_ESCOLAS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 32236510 (31M) [application/zip]\n", + "Saving to: ‘DSU_2022_ESCOLAS.zip’\n", + "\n", + "DSU_2022_ESCOLAS.zi 100%[===================>] 30.74M 252KB/s in 1m 57s \n", + "\n", + "2023-02-11 18:38:52 (270 KB/s) - ‘DSU_2022_ESCOLAS.zip’ saved [32236510/32236510]\n", + "\n", + "--2023-02-11 18:38:52-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/IRD_2022_ESCOLAS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 20263711 (19M) [application/zip]\n", + "Saving to: ‘IRD_2022_ESCOLAS.zip’\n", + "\n", + "IRD_2022_ESCOLAS.zi 100%[===================>] 19.32M 362KB/s in 59s \n", + "\n", + "2023-02-11 18:39:54 (333 KB/s) - ‘IRD_2022_ESCOLAS.zip’ saved [20263711/20263711]\n", + "\n", + "--2023-02-11 18:39:54-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2022/TDI_2022_ESCOLAS.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 35400577 (34M) [application/zip]\n", + "Saving to: ‘TDI_2022_ESCOLAS.zip’\n", + "\n", + "TDI_2022_ESCOLAS.zi 100%[===================>] 33.76M 334KB/s in 1m 59s \n", + "\n", + "2023-02-11 18:41:54 (291 KB/s) - ‘TDI_2022_ESCOLAS.zip’ saved [35400577/35400577]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "ordem = ['id_municipio', 'id_escola', 'localizacao', 'rede', \n", + " 'atu_ei', 'atu_ei_creche', 'atu_ei_pre_escola', 'atu_ef', 'atu_ef_anos_iniciais',\n", + " 'atu_ef_anos_finais', 'atu_ef_1_ano', 'atu_ef_2_ano', 'atu_ef_3_ano', 'atu_ef_4_ano', 'atu_ef_5_ano',\n", + " 'atu_ef_6_ano', 'atu_ef_7_ano', 'atu_ef_8_ano', 'atu_ef_9_ano', 'atu_ef_turmas_unif_multi_fluxo',\n", + " 'atu_em', 'atu_em_1_ano', 'atu_em_2_ano', 'atu_em_3_ano', 'atu_em_4_ano', 'atu_em_nao_seriado', 'had_ei',\n", + " 'had_ei_creche', 'had_ei_pre_escola', 'had_ef', 'had_ef_anos_iniciais', 'had_ef_anos_finais', 'had_ef_1_ano',\n", + " 'had_ef_2_ano', 'had_ef_3_ano', 'had_ef_4_ano', 'had_ef_5_ano', 'had_ef_6_ano', 'had_ef_7_ano',\n", + " 'had_ef_8_ano', 'had_ef_9_ano', 'had_em', 'had_em_1_ano', 'had_em_2_ano', 'had_em_3_ano', 'had_em_4_ano',\n", + " 'had_em_nao_seriado', 'tdi_ef', 'tdi_ef_anos_iniciais', 'tdi_ef_anos_finais', 'tdi_ef_1_ano', 'tdi_ef_2_ano',\n", + " 'tdi_ef_3_ano', 'tdi_ef_4_ano', 'tdi_ef_5_ano', 'tdi_ef_6_ano', 'tdi_ef_7_ano', 'tdi_ef_8_ano', 'tdi_ef_9_ano',\n", + " 'tdi_em', 'tdi_em_1_ano', 'tdi_em_2_ano', 'tdi_em_3_ano', 'tdi_em_4_ano', 'taxa_aprovacao_ef', \n", + " 'taxa_aprovacao_ef_anos_iniciais', 'taxa_aprovacao_ef_anos_finais', 'taxa_aprovacao_ef_1_ano',\n", + " 'taxa_aprovacao_ef_2_ano', 'taxa_aprovacao_ef_3_ano', 'taxa_aprovacao_ef_4_ano', 'taxa_aprovacao_ef_5_ano',\n", + " 'taxa_aprovacao_ef_6_ano', 'taxa_aprovacao_ef_7_ano', 'taxa_aprovacao_ef_8_ano', 'taxa_aprovacao_ef_9_ano',\n", + " 'taxa_aprovacao_em', 'taxa_aprovacao_em_1_ano', 'taxa_aprovacao_em_2_ano', 'taxa_aprovacao_em_3_ano',\n", + " 'taxa_aprovacao_em_4_ano', 'taxa_aprovacao_em_nao_seriado', 'taxa_reprovacao_ef', 'taxa_reprovacao_ef_anos_iniciais',\n", + " 'taxa_reprovacao_ef_anos_finais', 'taxa_reprovacao_ef_1_ano', 'taxa_reprovacao_ef_2_ano', 'taxa_reprovacao_ef_3_ano',\n", + " 'taxa_reprovacao_ef_4_ano', 'taxa_reprovacao_ef_5_ano', 'taxa_reprovacao_ef_6_ano', 'taxa_reprovacao_ef_7_ano',\n", + " 'taxa_reprovacao_ef_8_ano' ,'taxa_reprovacao_ef_9_ano', 'taxa_reprovacao_em', 'taxa_reprovacao_em_1_ano',\n", + " 'taxa_reprovacao_em_2_ano' ,'taxa_reprovacao_em_3_ano', 'taxa_reprovacao_em_4_ano' ,'taxa_reprovacao_em_nao_seriado',\n", + " 'taxa_abandono_ef' ,'taxa_abandono_ef_anos_iniciais', 'taxa_abandono_ef_anos_finais', 'taxa_abandono_ef_1_ano',\n", + " 'taxa_abandono_ef_2_ano' ,'taxa_abandono_ef_3_ano', 'taxa_abandono_ef_4_ano', 'taxa_abandono_ef_5_ano',\n", + " 'taxa_abandono_ef_6_ano', 'taxa_abandono_ef_7_ano', 'taxa_abandono_ef_8_ano' ,'taxa_abandono_ef_9_ano', \n", + " 'taxa_abandono_em', 'taxa_abandono_em_1_ano' ,'taxa_abandono_em_2_ano', 'taxa_abandono_em_3_ano', \n", + " 'taxa_abandono_em_4_ano', 'taxa_abandono_em_nao_seriado', 'tnr_ef', 'tnr_ef_anos_iniciais', \n", + " 'tnr_ef_anos_finais', 'tnr_ef_1_ano', 'tnr_ef_2_ano', 'tnr_ef_3_ano', 'tnr_ef_4_ano', 'tnr_ef_5_ano', \n", + " 'tnr_ef_6_ano', 'tnr_ef_7_ano', 'tnr_ef_8_ano', 'tnr_ef_9_ano', 'tnr_em' ,'tnr_em_1_ano', 'tnr_em_2_ano',\n", + " 'tnr_em_3_ano', 'tnr_em_4_ano', 'tnr_em_nao_seriado', 'dsu_ei', 'dsu_ei_creche', 'dsu_ei_pre_escola', \n", + " 'dsu_ef', 'dsu_ef_anos_iniciais', 'dsu_ef_anos_finais' ,'dsu_em', 'dsu_ep', 'dsu_eja', 'dsu_ee',\n", + " 'afd_ei_grupo_1', 'afd_ei_grupo_2', 'afd_ei_grupo_3', 'afd_ei_grupo_4', 'afd_ei_grupo_5', 'afd_ef_grupo_1',\n", + " 'afd_ef_grupo_2', 'afd_ef_grupo_3', 'afd_ef_grupo_4', 'afd_ef_grupo_5', 'afd_ef_anos_iniciais_grupo_1',\n", + " 'afd_ef_anos_iniciais_grupo_2', 'afd_ef_anos_iniciais_grupo_3', 'afd_ef_anos_iniciais_grupo_4',\n", + " 'afd_ef_anos_iniciais_grupo_5', 'afd_ef_anos_finais_grupo_1', 'afd_ef_anos_finais_grupo_2',\n", + " 'afd_ef_anos_finais_grupo_3', 'afd_ef_anos_finais_grupo_4', 'afd_ef_anos_finais_grupo_5', 'afd_em_grupo_1', \n", + " 'afd_em_grupo_2', 'afd_em_grupo_3', 'afd_em_grupo_4', 'afd_em_grupo_5', 'afd_eja_fundamental_grupo_1', \n", + " 'afd_eja_fundamental_grupo_2', 'afd_eja_fundamental_grupo_3' ,'afd_eja_fundamental_grupo_4',\n", + " 'afd_eja_fundamental_grupo_5' ,'afd_eja_medio_grupo_1', 'afd_eja_medio_grupo_2', 'afd_eja_medio_grupo_3',\n", + " 'afd_eja_medio_grupo_4', 'afd_eja_medio_grupo_5' ,'ird_media_regularidade_docente', 'ied_ef_nivel_1', \n", + " 'ied_ef_nivel_2', 'ied_ef_nivel_3', 'ied_ef_nivel_4', \n", + " 'ied_ef_nivel_5', 'ied_ef_nivel_6', 'ied_ef_anos_iniciais_nivel_1', 'ied_ef_anos_iniciais_nivel_2',\n", + " 'ied_ef_anos_iniciais_nivel_3', 'ied_ef_anos_iniciais_nivel_4', 'ied_ef_anos_iniciais_nivel_5',\n", + " 'ied_ef_anos_iniciais_nivel_6', 'ied_ef_anos_finais_nivel_1', 'ied_ef_anos_finais_nivel_2',\n", + " 'ied_ef_anos_finais_nivel_3', 'ied_ef_anos_finais_nivel_4', 'ied_ef_anos_finais_nivel_5',\n", + " 'ied_ef_anos_finais_nivel_6', 'ied_em_nivel_1' ,'ied_em_nivel_2', 'ied_em_nivel_3', 'ied_em_nivel_4',\n", + " 'ied_em_nivel_5', 'ied_em_nivel_6', 'icg_nivel_complexidade_gestao_escola']" + ], + "metadata": { + "id": "pixctEMyvoaH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#2022\n", + "rename_afd = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', 'ED_INF_CAT_1':'afd_ei_grupo_1', 'ED_INF_CAT_2':'afd_ei_grupo_2',\n", + " 'ED_INF_CAT_3':'afd_ei_grupo_3', 'ED_INF_CAT_4':'afd_ei_grupo_4', 'ED_INF_CAT_5':'afd_ei_grupo_5', 'FUN_CAT_1':'afd_ef_grupo_1', \n", + " 'FUN_CAT_2':'afd_ef_grupo_2', 'FUN_CAT_3':'afd_ef_grupo_3', 'FUN_CAT_4':'afd_ef_grupo_4', 'FUN_CAT_5':'afd_ef_grupo_5', 'FUN_AI_CAT_1':'afd_ef_anos_iniciais_grupo_1',\n", + " 'FUN_AI_CAT_2':'afd_ef_anos_iniciais_grupo_2', 'FUN_AI_CAT_3':'afd_ef_anos_iniciais_grupo_3', 'FUN_AI_CAT_4':'afd_ef_anos_iniciais_grupo_4',\n", + " 'FUN_AI_CAT_5':'afd_ef_anos_iniciais_grupo_5', 'FUN_AF_CAT_1':'afd_ef_anos_finais_grupo_1', 'FUN_AF_CAT_2':'afd_ef_anos_finais_grupo_2',\n", + " 'FUN_AF_CAT_3':'afd_ef_anos_finais_grupo_3', 'FUN_AF_CAT_4':'afd_ef_anos_finais_grupo_4', 'FUN_AF_CAT_5':'afd_ef_anos_finais_grupo_5',\n", + " 'MED_CAT_1':'afd_em_grupo_1', 'MED_CAT_2':'afd_em_grupo_2', 'MED_CAT_3':'afd_em_grupo_3', 'MED_CAT_4':'afd_em_grupo_4', 'MED_CAT_5':'afd_em_grupo_5',\n", + " 'EJA_FUN_CAT_1':'afd_eja_fundamental_grupo_1', 'EJA_FUN_CAT_2':'afd_eja_fundamental_grupo_2', 'EJA_FUN_CAT_3':'afd_eja_fundamental_grupo_3',\n", + " 'EJA_FUN_CAT_4':'afd_eja_fundamental_grupo_4', 'EJA_FUN_CAT_5':'afd_eja_fundamental_grupo_5', 'EJA_MED_CAT_1':'afd_eja_medio_grupo_1',\n", + " 'EJA_MED_CAT_2':'afd_eja_medio_grupo_2', 'EJA_MED_CAT_3':'afd_eja_medio_grupo_3', 'EJA_MED_CAT_4':'afd_eja_medio_grupo_4', \n", + " 'EJA_MED_CAT_5':'afd_eja_medio_grupo_5'}\n", + "\n", + "with ZipFile('/content/AFD_2022_ESCOLAS.zip') as z:\n", + " with z.open('AFD_2022_ESCOLAS/AFD_ESCOLAS_2022.xlsx') as f:\n", + " afd = pd.read_excel(f, skiprows=10, dtype=str)\n", + " afd.drop(range(175523, 175528), inplace=True)\n", + " afd = afd.replace('--', '')\n", + " afd.rename(columns=rename_afd, inplace=True)\n", + " afd['localizacao'] = afd['localizacao'].str.lower().replace('pública', 'publica')\n", + " afd['rede'] = afd['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_atu = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'atu_ei', 'CRE_CAT_0':'atu_ei_creche', 'PRE_CAT_0':'atu_ei_pre_escola', 'FUN_CAT_0':'atu_ef',\n", + " 'FUN_AI_CAT_0':'atu_ef_anos_iniciais', 'FUN_AF_CAT_0':'atu_ef_anos_finais', 'FUN_01_CAT_0':'atu_ef_1_ano',\n", + " 'FUN_02_CAT_0':'atu_ef_2_ano', 'FUN_03_CAT_0':'atu_ef_3_ano', 'FUN_04_CAT_0':'atu_ef_4_ano',\n", + " 'FUN_05_CAT_0':'atu_ef_5_ano', 'FUN_06_CAT_0':'atu_ef_6_ano', 'FUN_07_CAT_0':'atu_ef_7_ano', \n", + " 'FUN_08_CAT_0':'atu_ef_8_ano', 'FUN_09_CAT_0':'atu_ef_9_ano', 'MULT_ETA_CAT_0':'atu_ef_turmas_unif_multi_fluxo',\n", + " 'MED_CAT_0':'atu_em', 'MED_01_CAT_0':'atu_em_1_ano', 'MED_02_CAT_0':'atu_em_2_ano', 'MED_03_CAT_0':'atu_em_3_ano',\n", + " 'MED_04_CAT_0':'atu_em_4_ano', 'MED_NS_CAT_0':'atu_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/ATU_2022_ESCOLAS.zip') as z:\n", + " with z.open('ATU_2022_ESCOLAS/ATU_ESCOLAS_2022.xlsx') as f:\n", + " atu = pd.read_excel(f, skiprows=8, dtype=str)\n", + " atu.drop(range(174319, 174322), inplace=True)\n", + " atu = atu.replace('--', '')\n", + " atu.rename(columns=rename_atu, inplace=True)\n", + " atu['localizacao'] = atu['localizacao'].str.lower().replace('pública', 'publica')\n", + " atu['rede'] = atu['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_dsu = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'dsu_ei', 'CRE_CAT_0':'dsu_ei_creche', 'PRE_CAT_0':'dsu_ei_pre_escola', 'FUN_CAT_0':'dsu_ef',\n", + " 'FUN_AI_CAT_0':'dsu_ef_anos_iniciais', 'FUN_AF_CAT_0':'dsu_ef_anos_finais', 'MED_CAT_0':'dsu_em',\n", + " 'PROF_CAT_0':'dsu_ep', 'EJA_CAT_0':'dsu_eja', 'EDU_BAS_CAT_0':'dsu_ee'}\n", + "\n", + "with ZipFile('/content/DSU_2022_ESCOLAS.zip') as z:\n", + " with z.open('DSU_2022_ESCOLAS/DSU_ESCOLAS_2022.xlsx') as f:\n", + " dsu = pd.read_excel(f, skiprows=9, dtype=str)\n", + " dsu.drop(range(178111, 178113), inplace=True)\n", + " dsu = dsu.replace('--', '')\n", + " dsu.rename(columns=rename_dsu, inplace=True)\n", + " dsu['localizacao'] = dsu['localizacao'].str.lower().replace('pública', 'publica')\n", + " dsu['rede'] = dsu['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_had = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'ED_INF_CAT_0':'had_ei', 'CRE_CAT_0':'had_ei_creche', 'PRE_CAT_0':'had_ei_pre_escola',\n", + " 'FUN_CAT_0':'had_ef', 'FUN_AI_CAT_0':'had_ef_anos_iniciais', 'FUN_AF_CAT_0':'had_ef_anos_finais', 'FUN_01_CAT_0':'had_ef_1_ano',\n", + " 'FUN_02_CAT_0':'had_ef_2_ano', 'FUN_03_CAT_0':'had_ef_3_ano', 'FUN_04_CAT_0':'had_ef_4_ano', 'FUN_05_CAT_0':'had_ef_5_ano',\n", + " 'FUN_06_CAT_0':'had_ef_6_ano', 'FUN_07_CAT_0':'had_ef_7_ano', 'FUN_08_CAT_0':'had_ef_8_ano', 'FUN_09_CAT_0':'had_ef_9_ano',\n", + " 'MED_CAT_0':'had_em', 'MED_01_CAT_0':'had_em_1_ano', 'MED_02_CAT_0':'had_em_2_ano', 'MED_03_CAT_0':'had_em_3_ano',\n", + " 'MED_04_CAT_0':'had_em_4_ano', 'MED_NS_CAT_0':'had_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/HAD_2022_ESCOLAS.zip') as z:\n", + " with z.open('HAD_2022_ESCOLAS/HAD_ESCOLAS_2022.xlsx') as f:\n", + " had = pd.read_excel(f, skiprows=8, dtype=str)\n", + " had.drop(range(173795, 173798), inplace=True)\n", + " had = had.replace('--', '')\n", + " had.rename(columns=rename_had, inplace=True)\n", + " had['localizacao'] = had['localizacao'].str.lower().replace('pública', 'publica')\n", + " had['rede'] = had['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_icg = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'COMPLEX':'icg_nivel_complexidade_gestao_escola'}\n", + "\n", + "with ZipFile('/content/ICG_2022_ESCOLAS.zip') as z:\n", + " with z.open('ICG_2022_ESCOLAS/ICG_ESCOLAS_2022.xlsx') as f:\n", + " icg = pd.read_excel(f, skiprows=10, dtype=str)\n", + " icg.drop(range(178346, 178351), inplace=True)\n", + " icg = icg.replace('--', '')\n", + " icg.rename(columns=rename_icg, inplace=True)\n", + " icg['localizacao'] = icg['localizacao'].str.lower().replace('pública', 'publica')\n", + " icg['rede'] = icg['rede'].str.lower().replace('pública', 'publica')\n", + " icg['icg_nivel_complexidade_gestao_escola'] = icg['icg_nivel_complexidade_gestao_escola'].str.lower()\n", + " icg['icg_nivel_complexidade_gestao_escola']= icg['icg_nivel_complexidade_gestao_escola'].apply(lambda x: str(x).replace('Nível', 'nivel'))\n", + "\n", + "rename_ied = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'FUN_CAT_1':'ied_ef_nivel_1', 'FUN_CAT_2':'ied_ef_nivel_2', 'FUN_CAT_3':'ied_ef_nivel_3',\n", + " 'FUN_CAT_4':'ied_ef_nivel_4', 'FUN_CAT_5':'ied_ef_nivel_5', 'FUN_CAT_6':'ied_ef_nivel_6',\n", + " 'FUN_AI_CAT_1':'ied_ef_anos_iniciais_nivel_1', 'FUN_AI_CAT_2':'ied_ef_anos_iniciais_nivel_2',\n", + " 'FUN_AI_CAT_3':'ied_ef_anos_iniciais_nivel_3', 'FUN_AI_CAT_4':'ied_ef_anos_iniciais_nivel_4',\n", + " 'FUN_AI_CAT_5':'ied_ef_anos_iniciais_nivel_5', 'FUN_AI_CAT_6':'ied_ef_anos_iniciais_nivel_6',\n", + " 'FUN_AF_CAT_1':'ied_ef_anos_finais_nivel_1', 'FUN_AF_CAT_2':'ied_ef_anos_finais_nivel_2',\n", + " 'FUN_AF_CAT_3':'ied_ef_anos_finais_nivel_3', 'FUN_AF_CAT_4':'ied_ef_anos_finais_nivel_4',\n", + " 'FUN_AF_CAT_5':'ied_ef_anos_finais_nivel_5', 'FUN_AF_CAT_6':'ied_ef_anos_finais_nivel_6',\n", + " 'MED_CAT_1':'ied_em_nivel_1', 'MED_CAT_2':'ied_em_nivel_2', 'MED_CAT_3':'ied_em_nivel_3',\n", + " 'MED_CAT_4':'ied_em_nivel_4', 'MED_CAT_5':'ied_em_nivel_5', 'MED_CAT_6':'ied_em_nivel_6'}\n", + "\n", + "with ZipFile('/content/IED_2022_ESCOLAS.zip') as z:\n", + " with z.open('IED_2022_ESCOLAS/IED_ESCOLAS_2022.xlsx') as f:\n", + " ied = pd.read_excel(f, skiprows=10, dtype=str)\n", + " ied.drop(range(130907, 130912), inplace=True)\n", + " ied = ied.replace('--', '')\n", + " ied.rename(columns=rename_ied, inplace=True)\n", + " ied['localizacao'] = ied['localizacao'].str.lower().replace('pública', 'publica')\n", + " ied['rede'] = ied['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_ird = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'EDU_BAS_CAT_0':'ird_media_regularidade_docente'}\n", + "\n", + "with ZipFile('/content/IRD_2022_ESCOLAS.zip') as z:\n", + " with z.open('IRD_2022_ESCOLAS/IRD_ESCOLAS_2022.xlsx') as f:\n", + " ird = pd.read_excel(f, skiprows=10, dtype=str)\n", + " ird.drop(range(165721, 165726), inplace=True)\n", + " ird = ird.replace('--', '')\n", + " ird.rename(columns=rename_ird, inplace=True)\n", + " ird['localizacao'] = ird['localizacao'].str.lower().replace('pública', 'publica')\n", + " ird['rede'] = ird['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "rename_tdi = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede',\n", + " 'FUN_CAT_0':'tdi_ef', 'FUN_AI_CAT_0':'tdi_ef_anos_iniciais', 'FUN_AF_CAT_0':'tdi_ef_anos_finais',\n", + " 'FUN_01_CAT_0':'tdi_ef_1_ano', 'FUN_02_CAT_0':'tdi_ef_2_ano', 'FUN_03_CAT_0':'tdi_ef_3_ano',\n", + " 'FUN_04_CAT_0':'tdi_ef_4_ano', 'FUN_05_CAT_0':'tdi_ef_5_ano', 'FUN_06_CAT_0':'tdi_ef_6_ano',\n", + " 'FUN_07_CAT_0':'tdi_ef_7_ano', 'FUN_08_CAT_0':'tdi_ef_8_ano', 'FUN_09_CAT_0':'tdi_ef_9_ano',\n", + " 'MED_CAT_0':'tdi_em', 'MED_01_CAT_0':'tdi_em_1_ano', 'MED_02_CAT_0':'tdi_em_2_ano',\n", + " 'MED_03_CAT_0':'tdi_em_3_ano', 'MED_04_CAT_0':'tdi_em_4_ano'}\n", + "\n", + "with ZipFile('/content/TDI_2022_ESCOLAS.zip') as z:\n", + " with z.open('TDI_2022_ESCOLAS/TDI_ESCOLAS_2022.xlsx') as f:\n", + " tdi = pd.read_excel(f, skiprows=8, dtype=str)\n", + " tdi.drop(range(129174, 129176), inplace=True)\n", + " tdi = tdi.replace('--', '')\n", + " tdi.rename(columns=rename_tdi, inplace=True)\n", + " tdi['localizacao'] = tdi['localizacao'].str.lower().replace('pública', 'publica')\n", + " tdi['rede'] = tdi['rede'].str.lower().replace('pública', 'publica')\n", + "\n", + "# rename_tnr = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", + "# '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", + "# '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", + "# '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", + "# '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", + "# '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", + "\n", + "# with ZipFile('/content/tnr_municipios_2021.zip') as z:\n", + "# with z.open('tnr_municipios_2021/tnr_municipios_2021.xlsx') as f:\n", + "# tnr = pd.read_excel(f, skiprows=8, dtype=str)\n", + "# tnr.drop(range(65574, 65577), inplace=True)\n", + "# tnr = tnr.replace('--', '')\n", + "# tnr.rename(columns=rename_tnr, inplace=True)\n", + "# tnr[index] = tnr[index].astype(str)\n", + "# tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", + "# tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", + " \n", + "\n", + "# rename_tx = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", + "# '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", + "# '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", + "# '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", + "# '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", + "# '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", + "# '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", + "# '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", + "# '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", + "# '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", + "# '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", + "# '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", + "# '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", + "# '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", + "# '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", + "# '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", + "# '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", + "# '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", + "# '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", + "\n", + "# with ZipFile('/content/tx_rend_municipios_2021.zip') as z:\n", + "# with z.open('tx_rend_municipios_2021/tx_rend_municipios_2021.xlsx') as f:\n", + "# tx = pd.read_excel(f, skiprows=8, dtype=str)\n", + "# tx.drop(range(65574, 65576), inplace=True)\n", + "# tx = tx.replace('--', '')\n", + "# tx.rename(columns=rename_tx, inplace=True)\n", + "# tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", + "# tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", + " \n", + "index = ['ano',\t'NO_REGIAO',\t'SG_UF',\t'id_municipio',\t'NO_MUNICIPIO',\t'localizacao',\t'rede', 'id_escola',\t'NO_ENTIDADE'] \n", + "df = pd.merge(afd, atu, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, dsu, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, had, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, icg, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, ied, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, ird, how='outer', left_on=index, right_on=index)\n", + "df = pd.merge(df, tdi, how='outer', left_on=index, right_on=index)\n", + "# df = pd.merge(df, tnr, how='outer', left_on=index, right_on=index)\n", + "# df = pd.merge(df, tx, how='outer', left_on=index, right_on=index)\n", + "#COMO ALGUNS DADOS NÃO FORAM ATUALIZADOS, CRIA-SE DF2 COMO COMPLEMENTAR PARA A ORDENAÇÃO DA BASE\n", + "df_list = df.columns\n", + "df2 = list(set(ordem) - set(df_list))\n", + "df = pd.concat([df, pd.DataFrame(columns=df2)])\n", + "#ORDENA TODAS AS VARIÁVEIS \n", + "df = df[ordem]\n", + "df.to_csv('/content/escola.csv', index=False, encoding='utf-8', na_rep='')" + ], + "metadata": { + "id": "oBnitg2gs1C6" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "query = ''' \n", + "SELECT * FROM `basedosdados-dev.br_inep_indicadores_educacionais.escola` WHERE ano = 2021\n", + "'''\n", + "\n", + "df_escola = bd.read_sql(query, billing_project_id='input-bd')" + ], + "metadata": { + "id": "Ue9hLyUZOxni" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df_escola = pd.read_csv('/content/staging_br_inep_indicadores_educacionais_escola_ano=2021_escola.csv')\n", + "\n", + "index = ['id_municipio', 'id_escola', 'localizacao', 'rede']\n", + "drop = ['taxa_aprovacao_ef', 'taxa_aprovacao_ef_anos_iniciais',\n", + " 'taxa_aprovacao_ef_anos_finais', 'taxa_aprovacao_ef_1_ano',\n", + " 'taxa_aprovacao_ef_2_ano', 'taxa_aprovacao_ef_3_ano',\n", + " 'taxa_aprovacao_ef_4_ano', 'taxa_aprovacao_ef_5_ano',\n", + " 'taxa_aprovacao_ef_6_ano', 'taxa_aprovacao_ef_7_ano',\n", + " 'taxa_aprovacao_ef_8_ano', 'taxa_aprovacao_ef_9_ano', 'taxa_aprovacao_em',\n", + " 'taxa_aprovacao_em_1_ano', 'taxa_aprovacao_em_2_ano',\n", + " 'taxa_aprovacao_em_3_ano', 'taxa_aprovacao_em_4_ano',\n", + " 'taxa_aprovacao_em_nao_seriado', 'taxa_reprovacao_ef',\n", + " 'taxa_reprovacao_ef_anos_iniciais', 'taxa_reprovacao_ef_anos_finais',\n", + " 'taxa_reprovacao_ef_1_ano', 'taxa_reprovacao_ef_2_ano',\n", + " 'taxa_reprovacao_ef_3_ano', 'taxa_reprovacao_ef_4_ano',\n", + " 'taxa_reprovacao_ef_5_ano', 'taxa_reprovacao_ef_6_ano',\n", + " 'taxa_reprovacao_ef_7_ano', 'taxa_reprovacao_ef_8_ano',\n", + " 'taxa_reprovacao_ef_9_ano', 'taxa_reprovacao_em',\n", + " 'taxa_reprovacao_em_1_ano', 'taxa_reprovacao_em_2_ano',\n", + " 'taxa_reprovacao_em_3_ano', 'taxa_reprovacao_em_4_ano',\n", + " 'taxa_reprovacao_em_nao_seriado', 'taxa_abandono_ef',\n", + " 'taxa_abandono_ef_anos_iniciais', 'taxa_abandono_ef_anos_finais',\n", + " 'taxa_abandono_ef_1_ano', 'taxa_abandono_ef_2_ano',\n", + " 'taxa_abandono_ef_3_ano', 'taxa_abandono_ef_4_ano',\n", + " 'taxa_abandono_ef_5_ano', 'taxa_abandono_ef_6_ano',\n", + " 'taxa_abandono_ef_7_ano', 'taxa_abandono_ef_8_ano',\n", + " 'taxa_abandono_ef_9_ano', 'taxa_abandono_em', 'taxa_abandono_em_1_ano',\n", + " 'taxa_abandono_em_2_ano', 'taxa_abandono_em_3_ano',\n", + " 'taxa_abandono_em_4_ano', 'taxa_abandono_em_nao_seriado',\n", + " 'tnr_ef', 'tnr_ef_anos_iniciais', 'tnr_ef_anos_finais',\n", + " 'tnr_ef_1_ano', 'tnr_ef_2_ano', 'tnr_ef_3_ano', 'tnr_ef_4_ano',\n", + " 'tnr_ef_5_ano', 'tnr_ef_6_ano', 'tnr_ef_7_ano', 'tnr_ef_8_ano',\n", + " 'tnr_ef_9_ano', 'tnr_em', 'tnr_em_1_ano', 'tnr_em_2_ano', 'tnr_em_3_ano',\n", + " 'tnr_em_4_ano', 'tnr_em_nao_seriado']\n", + "df_escola.drop(drop, axis=1, inplace=True)\n", + "df_escola[index] = df_escola[index].astype(str)" + ], + "metadata": { + "id": "T3BnKespuGFu" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/tx_rend_escolas_2021.zip" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "OVqdQPehVbal", + "outputId": "fd8bdc4c-6b6b-476d-f26c-f79e464ed32e" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "source": [ - "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/tnr_escolas_2021.zip" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "xxAHI_d7ILXh", - "outputId": "bc8d46b0-cc4a-417b-e91c-e4116dd53fc4" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "--2022-09-14 09:39:16-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/tnr_escolas_2021.zip\n", - "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", - "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", - "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", - " Unable to locally verify the issuer's authority.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 32365800 (31M) [application/zip]\n", - "Saving to: ‘tnr_escolas_2021.zip’\n", - "\n", - "tnr_escolas_2021.zi 100%[===================>] 30.87M 251KB/s in 1m 49s \n", - "\n", - "2022-09-14 09:41:07 (290 KB/s) - ‘tnr_escolas_2021.zip’ saved [32365800/32365800]\n", - "\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-09-14 09:29:58-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/tx_rend_escolas_2021.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 69251035 (66M) [application/zip]\n", + "Saving to: ‘tx_rend_escolas_2021.zip’\n", + "\n", + "tx_rend_escolas_202 100%[===================>] 66.04M 298KB/s in 4m 22s \n", + "\n", + "2022-09-14 09:34:22 (258 KB/s) - ‘tx_rend_escolas_2021.zip’ saved [69251035/69251035]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "rename_tx = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', \n", + " 'NO_DEPENDENCIA':'rede', '1_CAT_FUN':'taxa_aprovacao_ef', \n", + " '1_CAT_FUN_AI':'taxa_aprovacao_ef_anos_iniciais', '1_CAT_FUN_AF':'taxa_aprovacao_ef_anos_finais', \n", + " '1_CAT_FUN_01':'taxa_aprovacao_ef_1_ano', '1_CAT_FUN_02':'taxa_aprovacao_ef_2_ano', '1_CAT_FUN_03':'taxa_aprovacao_ef_3_ano',\n", + " '1_CAT_FUN_04':'taxa_aprovacao_ef_4_ano', '1_CAT_FUN_05':'taxa_aprovacao_ef_5_ano', '1_CAT_FUN_06':'taxa_aprovacao_ef_6_ano', \n", + " '1_CAT_FUN_07':'taxa_aprovacao_ef_7_ano', '1_CAT_FUN_08':'taxa_aprovacao_ef_8_ano', '1_CAT_FUN_09':'taxa_aprovacao_ef_9_ano',\n", + " '1_CAT_MED':'taxa_aprovacao_em', '1_CAT_MED_01':'taxa_aprovacao_em_1_ano', '1_CAT_MED_02':'taxa_aprovacao_em_2_ano',\n", + " '1_CAT_MED_03':'taxa_aprovacao_em_3_ano', '1_CAT_MED_04':'taxa_aprovacao_em_4_ano', '1_CAT_MED_NS':'taxa_aprovacao_em_nao_seriado',\n", + " '2_CAT_FUN':'taxa_reprovacao_ef', '2_CAT_FUN_AI':'taxa_reprovacao_ef_anos_iniciais', '2_CAT_FUN_AF':'taxa_reprovacao_ef_anos_finais',\n", + " '2_CAT_FUN_01':'taxa_reprovacao_ef_1_ano', '2_CAT_FUN_02':'taxa_reprovacao_ef_2_ano', '2_CAT_FUN_03':'taxa_reprovacao_ef_3_ano',\n", + " '2_CAT_FUN_04':'taxa_reprovacao_ef_4_ano', '2_CAT_FUN_05':'taxa_reprovacao_ef_5_ano', '2_CAT_FUN_06':'taxa_reprovacao_ef_6_ano',\n", + " '2_CAT_FUN_07':'taxa_reprovacao_ef_7_ano', '2_CAT_FUN_08':'taxa_reprovacao_ef_8_ano', '2_CAT_FUN_09':'taxa_reprovacao_ef_9_ano',\n", + " '2_CAT_MED':'taxa_reprovacao_em', '2_CAT_MED_01':'taxa_reprovacao_em_1_ano', '2_CAT_MED_02':'taxa_reprovacao_em_2_ano',\n", + " '2_CAT_MED_03':'taxa_reprovacao_em_3_ano', '2_CAT_MED_04':'taxa_reprovacao_em_4_ano', '2_CAT_MED_NS':'taxa_reprovacao_em_nao_seriado',\n", + " '3_CAT_FUN':'taxa_abandono_ef', '3_CAT_FUN_AI':'taxa_abandono_ef_anos_iniciais', '3_CAT_FUN_AF':'taxa_abandono_ef_anos_finais',\n", + " '3_CAT_FUN_01':'taxa_abandono_ef_1_ano', '3_CAT_FUN_02':'taxa_abandono_ef_2_ano', '3_CAT_FUN_03':'taxa_abandono_ef_3_ano', \n", + " '3_CAT_FUN_04':'taxa_abandono_ef_4_ano', '3_CAT_FUN_05':'taxa_abandono_ef_5_ano', '3_CAT_FUN_06':'taxa_abandono_ef_6_ano', \n", + " '3_CAT_FUN_07':'taxa_abandono_ef_7_ano', '3_CAT_FUN_08':'taxa_abandono_ef_8_ano', '3_CAT_FUN_09':'taxa_abandono_ef_9_ano',\n", + " '3_CAT_MED':'taxa_abandono_em', '3_CAT_MED_01':'taxa_abandono_em_1_ano', '3_CAT_MED_02':'taxa_abandono_em_2_ano', \n", + " '3_CAT_MED_03':'taxa_abandono_em_3_ano', '3_CAT_MED_04':'taxa_abandono_em_4_ano', '3_CAT_MED_NS':'taxa_abandono_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/tx_rend_escolas_2021.zip') as z:\n", + " with z.open('tx_rend_escolas_2021/tx_rend_escolas_2021.xlsx') as f:\n", + " tx = pd.read_excel(f, skiprows=8)\n", + " tx.drop([130129, 130130], inplace=True)\n", + " tx.drop(['NO_REGIAO', 'SG_UF', 'NO_MUNICIPIO', 'NO_ENTIDADE', 'Unnamed: 63', 'Unnamed: 64',\n", + " 'Unnamed: 65', 'Unnamed: 66', 'Unnamed: 67', 'Unnamed: 68',\n", + " 'Unnamed: 69', 'Unnamed: 70', 'Unnamed: 71', 'Unnamed: 72',\n", + " 'Unnamed: 73', 'Unnamed: 74', 'Unnamed: 75', 'Unnamed: 76',\n", + " 'Unnamed: 77', 'Unnamed: 78', 'Unnamed: 79', 'Unnamed: 80',\n", + " 'Unnamed: 81', 'Unnamed: 82', 'Unnamed: 83'], axis=1, inplace=True)\n", + " tx.rename(columns=rename_tx, inplace=True)\n", + " tx['id_municipio'] = tx['id_municipio'].astype(int)\n", + " tx['id_escola'] = tx['id_escola'].astype(int)\n", + " tx[index] = tx[index].astype(str)\n", + " tx['localizacao'] = tx['localizacao'].str.lower().replace('pública', 'publica')\n", + " tx['rede'] = tx['rede'].str.lower().replace('pública', 'publica')\n", + " tx = tx.replace('--', '')\n", + " df = pd.merge(df_escola, tx, how='left', left_on=index, right_on=index)\n", + " #df = df[ordem]\n", + " #df.to_csv('/content/escola.csv', index=False, encoding='utf-8', na_rep='')" + ], + "metadata": { + "id": "X9795OZ3VXoO" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!wget --no-check-certificate https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/tnr_escolas_2021.zip" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "xxAHI_d7ILXh", + "outputId": "bc8d46b0-cc4a-417b-e91c-e4116dd53fc4" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "source": [ - "rename_tnr = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", - " '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", - " '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", - " '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", - " '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", - " '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", - "\n", - "with ZipFile('/content/tnr_escolas_2021.zip') as z:\n", - " with z.open('tnr_escolas_2021/tnr_escolas_2021.xlsx') as f:\n", - " tnr = pd.read_excel(f, skiprows=8)\n", - " tnr.drop([130129, 130130, 130131], inplace=True)\n", - " tnr.drop(['NO_REGIAO', 'SG_UF', 'NO_MUNICIPIO', 'NO_ENTIDADE', 'Unnamed: 27'], axis=1, inplace=True)\n", - " tnr.rename(columns=rename_tnr, inplace=True)\n", - " tnr['id_municipio'] = tnr['id_municipio'].astype(int)\n", - " tnr['id_escola'] = tnr['id_escola'].astype(int)\n", - " tnr[index] = tnr[index].astype(str)\n", - " tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", - " tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", - " tnr = tnr.replace('--', '')\n", - " df = pd.merge(df, tnr, how='left', left_on=index, right_on=index)\n", - " df = df[ordem]\n", - " df.to_csv('/content/escola.csv', index=False, encoding='utf-8', na_rep='')" - ], - "metadata": { - "id": "3q3jeS1gLSaA" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-09-14 09:39:16-- https://download.inep.gov.br/informacoes_estatisticas/indicadores_educacionais/2021/tnr_escolas_2021.zip\n", + "Resolving download.inep.gov.br (download.inep.gov.br)... 200.130.24.15\n", + "Connecting to download.inep.gov.br (download.inep.gov.br)|200.130.24.15|:443... connected.\n", + "WARNING: cannot verify download.inep.gov.br's certificate, issued by ‘CN=RNP ICPEdu OV SSL CA 2019,O=Rede Nacional de Ensino e Pesquisa - RNP,C=BR’:\n", + " Unable to locally verify the issuer's authority.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 32365800 (31M) [application/zip]\n", + "Saving to: ‘tnr_escolas_2021.zip’\n", + "\n", + "tnr_escolas_2021.zi 100%[===================>] 30.87M 251KB/s in 1m 49s \n", + "\n", + "2022-09-14 09:41:07 (290 KB/s) - ‘tnr_escolas_2021.zip’ saved [32365800/32365800]\n", + "\n" + ] } - ] -} \ No newline at end of file + ] + }, + { + "cell_type": "code", + "source": [ + "rename_tnr = {'NU_ANO_CENSO':'ano', 'CO_MUNICIPIO':'id_municipio', 'CO_ENTIDADE':'id_escola', 'NO_CATEGORIA':'localizacao', 'NO_DEPENDENCIA':'rede', \n", + " '4_CAT_FUN':'tnr_ef', '4_CAT_FUN_AI':'tnr_ef_anos_iniciais', '4_CAT_FUN_AF':'tnr_ef_anos_finais', \n", + " '4_CAT_FUN_01':'tnr_ef_1_ano', '4_CAT_FUN_02':'tnr_ef_2_ano', '4_CAT_FUN_03':'tnr_ef_3_ano', '4_CAT_FUN_04':'tnr_ef_4_ano', \n", + " '4_CAT_FUN_05':'tnr_ef_5_ano', '4_CAT_FUN_06':'tnr_ef_6_ano', '4_CAT_FUN_07':'tnr_ef_7_ano', '4_CAT_FUN_08':'tnr_ef_8_ano', \n", + " '4_CAT_FUN_09':'tnr_ef_9_ano', '4_CAT_MED':'tnr_em', '4_CAT_MED_01':'tnr_em_1_ano', '4_CAT_MED_02':'tnr_em_2_ano', \n", + " '4_CAT_MED_03':'tnr_em_3_ano', '4_CAT_MED_04':'tnr_em_4_ano', '4_CAT_MED_NS':'tnr_em_nao_seriado'}\n", + "\n", + "with ZipFile('/content/tnr_escolas_2021.zip') as z:\n", + " with z.open('tnr_escolas_2021/tnr_escolas_2021.xlsx') as f:\n", + " tnr = pd.read_excel(f, skiprows=8)\n", + " tnr.drop([130129, 130130, 130131], inplace=True)\n", + " tnr.drop(['NO_REGIAO', 'SG_UF', 'NO_MUNICIPIO', 'NO_ENTIDADE', 'Unnamed: 27'], axis=1, inplace=True)\n", + " tnr.rename(columns=rename_tnr, inplace=True)\n", + " tnr['id_municipio'] = tnr['id_municipio'].astype(int)\n", + " tnr['id_escola'] = tnr['id_escola'].astype(int)\n", + " tnr[index] = tnr[index].astype(str)\n", + " tnr['localizacao'] = tnr['localizacao'].str.lower().replace('pública', 'publica')\n", + " tnr['rede'] = tnr['rede'].str.lower().replace('pública', 'publica')\n", + " tnr = tnr.replace('--', '')\n", + " df = pd.merge(df, tnr, how='left', left_on=index, right_on=index)\n", + " df = df[ordem]\n", + " df.to_csv('/content/escola.csv', index=False, encoding='utf-8', na_rep='')" + ], + "metadata": { + "id": "3q3jeS1gLSaA" + }, + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/bases/test_dataset/test_table/table_description.txt b/bases/test_dataset/test_table/table_description.txt index 7500878b0..e9390a63a 100644 --- a/bases/test_dataset/test_table/table_description.txt +++ b/bases/test_dataset/test_table/table_description.txt @@ -1,8 +1,4 @@ -<<<<<<< HEAD this is a test-dataset -======= -None ->>>>>>> fee7177eb7c1b2efc60334b30538bfea04eb2af9 Para saber mais acesse: Website: diff --git a/poetry.lock b/poetry.lock deleted file mode 100644 index fbcbbcc8a..000000000 --- a/poetry.lock +++ /dev/null @@ -1,667 +0,0 @@ -[[package]] -name = "astunparse" -version = "1.6.3" -description = "An AST unparser for Python" -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -six = ">=1.6.1,<2.0" -wheel = ">=0.23.0,<1.0" - -[[package]] -name = "cached-property" -version = "1.5.2" -description = "A decorator for caching properties in classes." -category = "main" -optional = false -python-versions = "*" - -[[package]] -name = "certifi" -version = "2022.9.24" -description = "Python package for providing Mozilla's CA Bundle." -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "charset-normalizer" -version = "2.1.1" -description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" -optional = false -python-versions = ">=3.6.0" - -[package.extras] -unicode_backport = ["unicodedata2"] - -[[package]] -name = "click" -version = "8.1.3" -description = "Composable command line interface toolkit" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} - -[[package]] -name = "colorama" -version = "0.4.6" -description = "Cross-platform colored terminal text." -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" - -[[package]] -name = "ghp-import" -version = "2.1.0" -description = "Copy your docs directly to the gh-pages branch." -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -python-dateutil = ">=2.8.1" - -[package.extras] -dev = ["flake8", "markdown", "twine", "wheel"] - -[[package]] -name = "idna" -version = "3.4" -description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" -optional = false -python-versions = ">=3.5" - -[[package]] -name = "importlib-metadata" -version = "5.1.0" -description = "Read metadata from Python packages" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} -zipp = ">=0.5" - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)"] -perf = ["ipython"] -testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] - -[[package]] -name = "Jinja2" -version = "3.1.2" -description = "A very fast and expressive template engine." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -MarkupSafe = ">=2.0" - -[package.extras] -i18n = ["Babel (>=2.7)"] - -[[package]] -name = "Markdown" -version = "3.3.7" -description = "Python implementation of Markdown." -category = "main" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} - -[package.extras] -testing = ["coverage", "pyyaml"] - -[[package]] -name = "MarkupSafe" -version = "2.1.1" -description = "Safely add untrusted strings to HTML/XML markup." -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "mergedeep" -version = "1.3.4" -description = "A deep merge function for 🐍." -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "mkdocs" -version = "1.4.2" -description = "Project documentation with Markdown." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -click = ">=7.0" -colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""} -ghp-import = ">=1.0" -importlib-metadata = {version = ">=4.3", markers = "python_version < \"3.10\""} -jinja2 = ">=2.11.1" -markdown = ">=3.2.1,<3.4" -mergedeep = ">=1.3.4" -packaging = ">=20.5" -pyyaml = ">=5.1" -pyyaml-env-tag = ">=0.1" -typing-extensions = {version = ">=3.10", markers = "python_version < \"3.8\""} -watchdog = ">=2.0" - -[package.extras] -i18n = ["babel (>=2.9.0)"] -min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.3)", "jinja2 (==2.11.1)", "markdown (==3.2.1)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "packaging (==20.5)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "typing-extensions (==3.10)", "watchdog (==2.0)"] - -[[package]] -name = "mkdocs-autorefs" -version = "0.4.1" -description = "Automatically link across pages in MkDocs." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -Markdown = ">=3.3" -mkdocs = ">=1.1" - -[[package]] -name = "mkdocs-click" -version = "0.5.0" -description = "An MkDocs extension to generate documentation for Click command line applications" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -click = ">=7,<9" -markdown = ">=3.0.0,<4.0.0" - -[[package]] -name = "mkdocs-material" -version = "8.5.10" -description = "Documentation that simply works" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -jinja2 = ">=3.0.2" -markdown = ">=3.2" -mkdocs = ">=1.4.0" -mkdocs-material-extensions = ">=1.1" -pygments = ">=2.12" -pymdown-extensions = ">=9.4" -requests = ">=2.26" - -[[package]] -name = "mkdocs-material-extensions" -version = "1.1.1" -description = "Extension pack for Python Markdown and MkDocs Material." -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "mkdocstrings" -version = "0.18.1" -description = "Automatic documentation from sources, for MkDocs." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -Jinja2 = ">=2.11.1" -Markdown = ">=3.3" -MarkupSafe = ">=1.1" -mkdocs = ">=1.2" -mkdocs-autorefs = ">=0.3.1" -mkdocstrings-python-legacy = ">=0.2" -pymdown-extensions = ">=6.3" - -[package.extras] -crystal = ["mkdocstrings-crystal (>=0.3.4)"] -python = ["mkdocstrings-python (>=0.5.2)"] -python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"] - -[[package]] -name = "mkdocstrings-python-legacy" -version = "0.2.2" -description = "A legacy Python handler for mkdocstrings." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -mkdocstrings = ">=0.18" -pytkdocs = ">=0.14" - -[[package]] -name = "packaging" -version = "21.3" -description = "Core utilities for Python packages" -category = "main" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" - -[[package]] -name = "Pygments" -version = "2.13.0" -description = "Pygments is a syntax highlighting package written in Python." -category = "main" -optional = false -python-versions = ">=3.6" - -[package.extras] -plugins = ["importlib-metadata"] - -[[package]] -name = "pymdown-extensions" -version = "9.9" -description = "Extension pack for Python Markdown." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -markdown = ">=3.2" - -[[package]] -name = "pyparsing" -version = "3.0.9" -description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "main" -optional = false -python-versions = ">=3.6.8" - -[package.extras] -diagrams = ["jinja2", "railroad-diagrams"] - -[[package]] -name = "python-dateutil" -version = "2.8.2" -description = "Extensions to the standard Python datetime module" -category = "main" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "pytkdocs" -version = "0.16.1" -description = "Load Python objects documentation." -category = "main" -optional = false -python-versions = ">=3.7" - -[package.dependencies] -astunparse = {version = ">=1.6", markers = "python_version < \"3.9\""} -cached-property = {version = ">=1.5", markers = "python_version < \"3.8\""} -typing-extensions = {version = ">=3.7", markers = "python_version < \"3.8\""} - -[package.extras] -numpy-style = ["docstring_parser (>=0.7)"] - -[[package]] -name = "PyYAML" -version = "6.0" -description = "YAML parser and emitter for Python" -category = "main" -optional = false -python-versions = ">=3.6" - -[[package]] -name = "pyyaml_env_tag" -version = "0.1" -description = "A custom YAML tag for referencing environment variables in YAML files. " -category = "main" -optional = false -python-versions = ">=3.6" - -[package.dependencies] -pyyaml = "*" - -[[package]] -name = "requests" -version = "2.28.1" -description = "Python HTTP for Humans." -category = "main" -optional = false -python-versions = ">=3.7, <4" - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = ">=2,<3" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<1.27" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] - -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" - -[[package]] -name = "typing-extensions" -version = "4.4.0" -description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" -optional = false -python-versions = ">=3.7" - -[[package]] -name = "urllib3" -version = "1.26.13" -description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] -socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] - -[[package]] -name = "watchdog" -version = "2.1.9" -description = "Filesystem events monitoring" -category = "main" -optional = false -python-versions = ">=3.6" - -[package.extras] -watchmedo = ["PyYAML (>=3.10)"] - -[[package]] -name = "wheel" -version = "0.38.4" -description = "A built-package format for Python" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -test = ["pytest (>=3.0.0)"] - -[[package]] -name = "zipp" -version = "3.11.0" -description = "Backport of pathlib-compatible object wrapper for zip files" -category = "main" -optional = false -python-versions = ">=3.7" - -[package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)"] -testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] - -[metadata] -lock-version = "1.1" -python-versions = "^3.7" -content-hash = "d13e0e0a8a10bdda935c620df72d341d3ecf9698146a38f1e54ea2e59ba2571d" - -[metadata.files] -astunparse = [ - {file = "astunparse-1.6.3-py2.py3-none-any.whl", hash = "sha256:c2652417f2c8b5bb325c885ae329bdf3f86424075c4fd1a128674bc6fba4b8e8"}, - {file = "astunparse-1.6.3.tar.gz", hash = "sha256:5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872"}, -] -cached-property = [ - {file = "cached-property-1.5.2.tar.gz", hash = "sha256:9fa5755838eecbb2d234c3aa390bd80fbd3ac6b6869109bfc1b499f7bd89a130"}, - {file = "cached_property-1.5.2-py2.py3-none-any.whl", hash = "sha256:df4f613cf7ad9a588cc381aaf4a512d26265ecebd5eb9e1ba12f1319eb85a6a0"}, -] -certifi = [ - {file = "certifi-2022.9.24-py3-none-any.whl", hash = "sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382"}, - {file = "certifi-2022.9.24.tar.gz", hash = "sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14"}, -] -charset-normalizer = [ - {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"}, - {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"}, -] -click = [ - {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, - {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, -] -colorama = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] -ghp-import = [ - {file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"}, - {file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"}, -] -idna = [ - {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, - {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, -] -importlib-metadata = [ - {file = "importlib_metadata-5.1.0-py3-none-any.whl", hash = "sha256:d84d17e21670ec07990e1044a99efe8d615d860fd176fc29ef5c306068fda313"}, - {file = "importlib_metadata-5.1.0.tar.gz", hash = "sha256:d5059f9f1e8e41f80e9c56c2ee58811450c31984dfa625329ffd7c0dad88a73b"}, -] -Jinja2 = [ - {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, - {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, -] -Markdown = [ - {file = "Markdown-3.3.7-py3-none-any.whl", hash = "sha256:f5da449a6e1c989a4cea2631aa8ee67caa5a2ef855d551c88f9e309f4634c621"}, - {file = "Markdown-3.3.7.tar.gz", hash = "sha256:cbb516f16218e643d8e0a95b309f77eb118cb138d39a4f27851e6a63581db874"}, -] -MarkupSafe = [ - {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-win32.whl", hash = "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-win32.whl", hash = "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-win32.whl", hash = "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-win32.whl", hash = "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247"}, - {file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"}, -] -mergedeep = [ - {file = "mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"}, - {file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"}, -] -mkdocs = [ - {file = "mkdocs-1.4.2-py3-none-any.whl", hash = "sha256:c8856a832c1e56702577023cd64cc5f84948280c1c0fcc6af4cd39006ea6aa8c"}, - {file = "mkdocs-1.4.2.tar.gz", hash = "sha256:8947af423a6d0facf41ea1195b8e1e8c85ad94ac95ae307fe11232e0424b11c5"}, -] -mkdocs-autorefs = [ - {file = "mkdocs-autorefs-0.4.1.tar.gz", hash = "sha256:70748a7bd025f9ecd6d6feeba8ba63f8e891a1af55f48e366d6d6e78493aba84"}, - {file = "mkdocs_autorefs-0.4.1-py3-none-any.whl", hash = "sha256:a2248a9501b29dc0cc8ba4c09f4f47ff121945f6ce33d760f145d6f89d313f5b"}, -] -mkdocs-click = [ - {file = "mkdocs_click-0.5.0-py3-none-any.whl", hash = "sha256:ded9c09e3a7966fba8b147bbe2a3ef0b31a21072340b8c08cd8c885da57d0746"}, - {file = "mkdocs_click-0.5.0.tar.gz", hash = "sha256:a506983881da5e9405fdecc03351806661f433e1c2f81092fb05c9b93998c11a"}, -] -mkdocs-material = [ - {file = "mkdocs_material-8.5.10-py3-none-any.whl", hash = "sha256:51760fa4c9ee3ca0b3a661ec9f9817ec312961bb84ff19e5b523fdc5256e1d6c"}, - {file = "mkdocs_material-8.5.10.tar.gz", hash = "sha256:7623608f746c6d9ff68a8ef01f13eddf32fa2cae5e15badb251f26d1196bc8f1"}, -] -mkdocs-material-extensions = [ - {file = "mkdocs_material_extensions-1.1.1-py3-none-any.whl", hash = "sha256:e41d9f38e4798b6617ad98ca8f7f1157b1e4385ac1459ca1e4ea219b556df945"}, - {file = "mkdocs_material_extensions-1.1.1.tar.gz", hash = "sha256:9c003da71e2cc2493d910237448c672e00cefc800d3d6ae93d2fc69979e3bd93"}, -] -mkdocstrings = [ - {file = "mkdocstrings-0.18.1-py3-none-any.whl", hash = "sha256:4053929356df8cd69ed32eef71d8f676a472ef72980c9ffd4f933ead1debcdad"}, - {file = "mkdocstrings-0.18.1.tar.gz", hash = "sha256:fb7c91ce7e3ab70488d3fa6c073a4f827cdc319042f682ef8ea95459790d64fc"}, -] -mkdocstrings-python-legacy = [ - {file = "mkdocstrings-python-legacy-0.2.2.tar.gz", hash = "sha256:f0e7ec6a19750581b752acb38f6b32fcd1efe006f14f6703125d2c2c9a5c6f02"}, - {file = "mkdocstrings_python_legacy-0.2.2-py3-none-any.whl", hash = "sha256:379107a3a5b8db9b462efc4493c122efe21e825e3702425dbd404621302a563a"}, -] -packaging = [ - {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, - {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, -] -Pygments = [ - {file = "Pygments-2.13.0-py3-none-any.whl", hash = "sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42"}, - {file = "Pygments-2.13.0.tar.gz", hash = "sha256:56a8508ae95f98e2b9bdf93a6be5ae3f7d8af858b43e02c5a2ff083726be40c1"}, -] -pymdown-extensions = [ - {file = "pymdown_extensions-9.9-py3-none-any.whl", hash = "sha256:ac698c15265680db5eb13cd4342abfcde2079ac01e5486028f47a1b41547b859"}, - {file = "pymdown_extensions-9.9.tar.gz", hash = "sha256:0f8fb7b74a37a61cc34e90b2c91865458b713ec774894ffad64353a5fce85cfc"}, -] -pyparsing = [ - {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, - {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, -] -python-dateutil = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, -] -pytkdocs = [ - {file = "pytkdocs-0.16.1-py3-none-any.whl", hash = "sha256:a8c3f46ecef0b92864cc598e9101e9c4cf832ebbf228f50c84aa5dd850aac379"}, - {file = "pytkdocs-0.16.1.tar.gz", hash = "sha256:e2ccf6dfe9dbbceb09818673f040f1a7c32ed0bffb2d709b06be6453c4026045"}, -] -PyYAML = [ - {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, - {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, - {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, - {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, - {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, - {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, - {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, - {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, - {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, - {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, - {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, - {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, - {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, - {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, - {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, - {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, - {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, - {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, - {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, - {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, - {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, - {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, -] -pyyaml_env_tag = [ - {file = "pyyaml_env_tag-0.1-py3-none-any.whl", hash = "sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069"}, - {file = "pyyaml_env_tag-0.1.tar.gz", hash = "sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb"}, -] -requests = [ - {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, - {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, -] -six = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] -typing-extensions = [ - {file = "typing_extensions-4.4.0-py3-none-any.whl", hash = "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"}, - {file = "typing_extensions-4.4.0.tar.gz", hash = "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa"}, -] -urllib3 = [ - {file = "urllib3-1.26.13-py2.py3-none-any.whl", hash = "sha256:47cc05d99aaa09c9e72ed5809b60e7ba354e64b59c9c173ac3018642d8bb41fc"}, - {file = "urllib3-1.26.13.tar.gz", hash = "sha256:c083dd0dce68dbfbe1129d5271cb90f9447dea7d52097c6e0126120c521ddea8"}, -] -watchdog = [ - {file = "watchdog-2.1.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a735a990a1095f75ca4f36ea2ef2752c99e6ee997c46b0de507ba40a09bf7330"}, - {file = "watchdog-2.1.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b17d302850c8d412784d9246cfe8d7e3af6bcd45f958abb2d08a6f8bedf695d"}, - {file = "watchdog-2.1.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ee3e38a6cc050a8830089f79cbec8a3878ec2fe5160cdb2dc8ccb6def8552658"}, - {file = "watchdog-2.1.9-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:64a27aed691408a6abd83394b38503e8176f69031ca25d64131d8d640a307591"}, - {file = "watchdog-2.1.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:195fc70c6e41237362ba720e9aaf394f8178bfc7fa68207f112d108edef1af33"}, - {file = "watchdog-2.1.9-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:bfc4d351e6348d6ec51df007432e6fe80adb53fd41183716017026af03427846"}, - {file = "watchdog-2.1.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8250546a98388cbc00c3ee3cc5cf96799b5a595270dfcfa855491a64b86ef8c3"}, - {file = "watchdog-2.1.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:117ffc6ec261639a0209a3252546b12800670d4bf5f84fbd355957a0595fe654"}, - {file = "watchdog-2.1.9-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:97f9752208f5154e9e7b76acc8c4f5a58801b338de2af14e7e181ee3b28a5d39"}, - {file = "watchdog-2.1.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:247dcf1df956daa24828bfea5a138d0e7a7c98b1a47cf1fa5b0c3c16241fcbb7"}, - {file = "watchdog-2.1.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:226b3c6c468ce72051a4c15a4cc2ef317c32590d82ba0b330403cafd98a62cfd"}, - {file = "watchdog-2.1.9-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d9820fe47c20c13e3c9dd544d3706a2a26c02b2b43c993b62fcd8011bcc0adb3"}, - {file = "watchdog-2.1.9-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:70af927aa1613ded6a68089a9262a009fbdf819f46d09c1a908d4b36e1ba2b2d"}, - {file = "watchdog-2.1.9-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ed80a1628cee19f5cfc6bb74e173f1b4189eb532e705e2a13e3250312a62e0c9"}, - {file = "watchdog-2.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9f05a5f7c12452f6a27203f76779ae3f46fa30f1dd833037ea8cbc2887c60213"}, - {file = "watchdog-2.1.9-py3-none-manylinux2014_armv7l.whl", hash = "sha256:255bb5758f7e89b1a13c05a5bceccec2219f8995a3a4c4d6968fe1de6a3b2892"}, - {file = "watchdog-2.1.9-py3-none-manylinux2014_i686.whl", hash = "sha256:d3dda00aca282b26194bdd0adec21e4c21e916956d972369359ba63ade616153"}, - {file = "watchdog-2.1.9-py3-none-manylinux2014_ppc64.whl", hash = "sha256:186f6c55abc5e03872ae14c2f294a153ec7292f807af99f57611acc8caa75306"}, - {file = "watchdog-2.1.9-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:083171652584e1b8829581f965b9b7723ca5f9a2cd7e20271edf264cfd7c1412"}, - {file = "watchdog-2.1.9-py3-none-manylinux2014_s390x.whl", hash = "sha256:b530ae007a5f5d50b7fbba96634c7ee21abec70dc3e7f0233339c81943848dc1"}, - {file = "watchdog-2.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:4f4e1c4aa54fb86316a62a87b3378c025e228178d55481d30d857c6c438897d6"}, - {file = "watchdog-2.1.9-py3-none-win32.whl", hash = "sha256:5952135968519e2447a01875a6f5fc8c03190b24d14ee52b0f4b1682259520b1"}, - {file = "watchdog-2.1.9-py3-none-win_amd64.whl", hash = "sha256:7a833211f49143c3d336729b0020ffd1274078e94b0ae42e22f596999f50279c"}, - {file = "watchdog-2.1.9-py3-none-win_ia64.whl", hash = "sha256:ad576a565260d8f99d97f2e64b0f97a48228317095908568a9d5c786c829d428"}, - {file = "watchdog-2.1.9.tar.gz", hash = "sha256:43ce20ebb36a51f21fa376f76d1d4692452b2527ccd601950d69ed36b9e21609"}, -] -wheel = [ - {file = "wheel-0.38.4-py3-none-any.whl", hash = "sha256:b60533f3f5d530e971d6737ca6d58681ee434818fab630c83a734bb10c083ce8"}, - {file = "wheel-0.38.4.tar.gz", hash = "sha256:965f5259b566725405b05e7cf774052044b1ed30119b5d586b2703aafe8719ac"}, -] -zipp = [ - {file = "zipp-3.11.0-py3-none-any.whl", hash = "sha256:83a28fcb75844b5c0cdaf5aa4003c2d728c77e05f5aeabe8e95e56727005fbaa"}, - {file = "zipp-3.11.0.tar.gz", hash = "sha256:a7a22e05929290a67401440b39690ae6563279bced5f314609d9d03798f56766"}, -] diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 69da85900..000000000 --- a/pyproject.toml +++ /dev/null @@ -1,18 +0,0 @@ -[tool.poetry] -name = "mais" -version = "1.6.11" -description = "Dependências do BD+, retiradas do pacote python" -authors = ["Mauricio Fagundes "] -readme = "README.md" - -[tool.poetry.dependencies] -python = "^3.7" -mkdocs = "^1.2.3" -mkdocs-click = "^0.5.0" -mkdocs-material = "^8.2.7" -mkdocstrings = "^0.18.1" - - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" diff --git a/python-package/.flake8 b/python-package/.flake8 index 409ceb60a..46e07c1a1 100644 --- a/python-package/.flake8 +++ b/python-package/.flake8 @@ -1,3 +1,4 @@ - [flake8] - select = C,E,F,W,B,B950 - extend-ignore = E501 \ No newline at end of file +[flake8] + select = C,E,F,W,B,B950 + extend-ignore = E501 + max-line-length = 88 \ No newline at end of file diff --git a/python-package/README.md b/python-package/README.md index 727b25123..269b94d5a 100644 --- a/python-package/README.md +++ b/python-package/README.md @@ -1,6 +1,6 @@ # Python Package -## Desenvolvimento Linux e Mac: +## Desenvolvimento Linux e Mac: Clone o repositório principal: @@ -20,7 +20,7 @@ python setup.py develop 1. Abra uma branch com o nome issue- 2. Faça as modificações necessárias -3. Suba o Pull Request apontando para a branch `python-next-minor` ou `python-next-patch`. +3. Suba o Pull Request apontando para a branch `python-next-minor` ou `python-next-patch`. Sendo, minor e patch referentes ao bump da versão: v1.5.7 --> v\.\.\. 4. O nome do PR deve seguir o padrão `[infra] ` @@ -28,7 +28,7 @@ python setup.py develop ### O que uma modificação precisa ter - + - Resolver o problema - Lista de modificações efetuadas 1. Mudei a função X para fazer Y @@ -36,7 +36,7 @@ python setup.py develop - Referência aos issues atendidos - Documentação e Docstrings - Testes - + ## Versionamento @@ -47,7 +47,7 @@ python setup.py develop ```bash git pull origin [python-version] ``` - + Onde `[python-version]` é a branch da nova versão do pacote. 2. Se necessario adicionar novas dependências: @@ -55,7 +55,7 @@ python setup.py develop poetry add ``` -3. Gerar novo `requirements-dev.txt` +3. Gerar novo `requirements-dev.txt` ```bash poetry export -f requirements.txt --output requirements-dev.txt --without-hashes @@ -77,17 +77,17 @@ python setup.py develop repository = "https://github.com/base-dos-dados/bases" version = "1.6.1-beta.2" ``` - + O campo `version` deve ser alterado para o número da versão sendo lançada. 5. Editar `basedosdados/__init__.py`: - + O arquivo `basedosdados/__init__.py` contém a versão do pacote em python da **BD**. Exemplo: - + ```python __version__ = "1.6.1-beta.2" ``` - + O atributo `__version__` também deve ser alterado para o número da versão sendo lançada. 6. Push para branch: @@ -99,7 +99,9 @@ python setup.py develop 7. Publicação do pacote no PyPI (exige usuário e senha): Para publicar o pacote no PyPI, use: - ```bash + Para publicar o pacote no PyPI, use: + + ```bash poetry version [python-version] poetry publish --build ``` diff --git a/python-package/basedosdados/__init__.py b/python-package/basedosdados/__init__.py index f63598420..c9520a252 100644 --- a/python-package/basedosdados/__init__.py +++ b/python-package/basedosdados/__init__.py @@ -1,34 +1,33 @@ -''' +""" Importing the module will automatically import the submodules. -''' - - -import sys +""" +# flake8: noqa import os +import sys + +from basedosdados._version import __version__ +from basedosdados._warnings import show_warnings -__version__ = "1.6.11" +show_warnings() sys.path.append(f"{os.getcwd()}/python-package") -#pylint: disable=C0413 +# pylint: disable=C0413 -from basedosdados.constants import constants, config -from basedosdados.upload.dataset import Dataset -from basedosdados.upload.storage import Storage -from basedosdados.upload.table import Table -from basedosdados.upload.metadata import Metadata +from basedosdados.backend import Backend +from basedosdados.constants import config, constants from basedosdados.download.base import reauth -from basedosdados.download.download import ( - read_sql, - download, - read_table, -) +from basedosdados.download.download import download, read_sql, read_table from basedosdados.download.metadata import ( - list_datasets, - list_dataset_tables, - get_table_description, get_dataset_description, get_table_columns, + get_table_description, get_table_size, + list_dataset_tables, + list_datasets, search, ) +from basedosdados.upload.connection import Connection +from basedosdados.upload.dataset import Dataset +from basedosdados.upload.storage import Storage +from basedosdados.upload.table import Table diff --git a/python-package/basedosdados/__main__.py b/python-package/basedosdados/__main__.py index 986556e07..75a22e18e 100644 --- a/python-package/basedosdados/__main__.py +++ b/python-package/basedosdados/__main__.py @@ -1,8 +1,8 @@ -''' +""" Import CLI implementation for the application. -''' +""" -#pylint: disable=E1120 +# pylint: disable=E1120 from basedosdados.cli.cli import cli diff --git a/python-package/basedosdados/_version.py b/python-package/basedosdados/_version.py new file mode 100644 index 000000000..360e59b9e --- /dev/null +++ b/python-package/basedosdados/_version.py @@ -0,0 +1,3 @@ +import importlib.metadata + +__version__ = importlib.metadata.version("basedosdados") diff --git a/python-package/basedosdados/_warnings.py b/python-package/basedosdados/_warnings.py new file mode 100644 index 000000000..2d68d9688 --- /dev/null +++ b/python-package/basedosdados/_warnings.py @@ -0,0 +1,124 @@ +""" +Checks for warnings and general-purpose messages and displays them to the user. +""" + +from typing import List, Tuple + +import requests +from loguru import logger + +from basedosdados._version import __version__ + + +def get_latest_version_number(): + """Get the latest version number from PyPI.""" + try: + response = requests.get( + "https://pypi.python.org/pypi/basedosdados/json", timeout=5 + ) + return response.json()["info"]["version"] + except: # noqa + logger.warning( + "Could not check for updates. Please check your internet connection." + ) + return None + + +def compare_version_numbers(versionA: str, versionB: str) -> Tuple[int, str, str]: + """ + Compares two version numbers and returns the difference between them. + + Args: + versionA (str): The first version number. + versionB (str): The second version number. + + Returns: + 1 if versionA > versionB + 0 if versionA == versionB + -1 if versionA < versionB + """ + + def parse_version(version: str): + version: List[str] = version.split(".") + if len(version) == 1: + version.append(0) + version.append(0) + elif len(version) == 2: + version.append(0) + elif len(version) > 3: + version = version[:3] + version[0] = int(version[0]) + version[1] = int(version[1]) + patch = "" + for char in version[2]: + if char.isdigit(): + patch += char + else: + break + version_type = "" + if "b" in version[2]: + version_type = "BETA" + elif "a" in version[2]: + version_type = "ALPHA" + else: + version_type = "GA" + version[2] = int(patch) + version.append(version_type) + return version + + versionA = parse_version(versionA) + versionB = parse_version(versionB) + + # Compare types. If both types are the same, compare the numbers. + if versionA[3] == versionB[3]: + for i in range(3): + if versionA[i] > versionB[i]: + return 1, versionA[3], versionB[3] + elif versionA[i] < versionB[i]: + return -1, versionA[3], versionB[3] + return 0, versionA[3], versionB[3] + # If the types are different, don't compare the numbers. + return 0, versionA[3], versionB[3] + + +def show_warnings(): + """Show warnings and general-purpose messages to the user.""" + # Version warning + try: + latest_version = get_latest_version_number() + if latest_version is not None: + comparison = compare_version_numbers(__version__, latest_version) + if comparison[0] == -1: + logger.warning( + f"You are using an outdated version of basedosdados ({__version__}). " + f"Please upgrade to the latest version ({latest_version}) using " + "'pip install --upgrade basedosdados'." + ) + except: # noqa + logger.warning( + "Could not check for updates. Please check your internet connection." + ) + # General-purpose warnings and messages + try: + response = requests.get( + "https://basedosdados.github.io/notifications/data.json" + ) + data = response.json() + if "general" in data: + if "messages" in data["general"]: + for message in data["general"]["messages"]: + logger.info(message) + if "warnings" in data["general"]: + for warning in data["general"]["warnings"]: + logger.warning(warning) + if "python" in data: + if "messages" in data["python"]: + for message in data["python"]["messages"]: + logger.info(message) + if "warnings" in data["python"]: + for warning in data["python"]["warnings"]: + logger.warning(warning) + except: # noqa + logger.warning( + "Could not check for warnings and messages. Please check your internet connection." + ) diff --git a/python-package/basedosdados/backend/__init__.py b/python-package/basedosdados/backend/__init__.py new file mode 100644 index 000000000..4e862ed21 --- /dev/null +++ b/python-package/basedosdados/backend/__init__.py @@ -0,0 +1,298 @@ +""" +Module for interacting with the backend. +""" +from typing import Any, Dict + +from loguru import logger + +try: + from gql import Client, gql + from gql.transport.requests import RequestsHTTPTransport + + _backend_dependencies = True +except ImportError: + _backend_dependencies = False + +from basedosdados.exceptions import BaseDosDadosMissingDependencyException + + +class Backend: + def __init__(self, graphql_url: str): + """ + Backend class for interacting with the backend. + + Args: + graphql_url (str): URL of the GraphQL endpoint. + """ + self._graphql_url: str = graphql_url + + @property + def graphql_url(self) -> str: + """ + GraphQL endpoint URL. + """ + return self._graphql_url + + def _get_client( + self, headers: Dict[str, str] = None, fetch_schema_from_transport: bool = False + ) -> "Client": + """ + Get a GraphQL client. + + Args: + headers (Dict[str, str], optional): Headers to be passed to the client. Defaults to + None. + fetch_schema_from_transport (bool, optional): Whether to fetch the schema from the + transport. Defaults to False. + + Returns: + Client: GraphQL client. + """ + if not _backend_dependencies: + raise BaseDosDadosMissingDependencyException( + "Optional dependencies for backend interaction are not installed. " + 'Please install basedosdados with the "upload" extra, such as:' + "\n\npip install basedosdados[upload]" + ) + transport = RequestsHTTPTransport( + url=self.graphql_url, headers=headers, use_json=True + ) + return Client( + transport=transport, fetch_schema_from_transport=fetch_schema_from_transport + ) + + def _execute_query( + self, + query: str, + variables: Dict[str, str] = None, + client: "Client" = None, + headers: Dict[str, str] = None, + fetch_schema_from_transport: bool = False, + ) -> Dict[str, Any]: + """ + Execute a GraphQL query. + + Args: + query (str): GraphQL query. + variables (Dict[str, str], optional): Variables to be passed to the query. Defaults + to None. + client (Client, optional): GraphQL client. Defaults to None. + headers (Dict[str, str], optional): Headers to be passed to the client. Defaults to + None. + fetch_schema_from_transport (bool, optional): Whether to fetch the schema from the + transport. Defaults to False. + + Returns: + Dict: GraphQL response. + """ + if not _backend_dependencies: + raise BaseDosDadosMissingDependencyException( + "Optional dependencies for backend interaction are not installed. " + 'Please install basedosdados with the "upload" extra, such as:' + "\n\npip install basedosdados[upload]" + ) + if not client: + client = self._get_client( + headers=headers, fetch_schema_from_transport=fetch_schema_from_transport + ) + try: + return client.execute(gql(query), variable_values=variables) + except Exception as e: + msg = f"The API URL in the config.toml file may be incorrect or the API might be temporarily unavailable!\nError executing query: {e}." + logger.error(msg) + return None + + def _get_dataset_id_from_name(self, gcp_dataset_id): + query = """ + query ($gcp_dataset_id: String!){ + allCloudtable(gcpDatasetId: $gcp_dataset_id) { + edges { + node { + table { + dataset { + _id + } + } + } + } + } + } + """ + + variables = {"gcp_dataset_id": gcp_dataset_id} + response = self._execute_query(query=query, variables=variables) + r = {} if response is None else self._simplify_graphql_response(response) + if r.get("allCloudtable", []) != []: + return r.get("allCloudtable")[0].get("table").get("dataset").get("_id") + msg = f"{gcp_dataset_id} not found. Please create the metadata first in {self.graphql_url}" + logger.info(msg) + return None + + def _get_table_id_from_name(self, gcp_dataset_id, gcp_table_id): + query = """ + query ($gcp_dataset_id: String!, $gcp_table_id: String!){ + allCloudtable(gcpDatasetId: $gcp_dataset_id, gcpTableId: $gcp_table_id) { + edges { + node { + table { + _id + } + } + } + } + } + """ + + if gcp_dataset_id: + variables = { + "gcp_dataset_id": gcp_dataset_id, + "gcp_table_id": gcp_table_id, + } + + response = self._execute_query(query=query, variables=variables) + r = {} if response is None else self._simplify_graphql_response(response) + if r.get("allCloudtable", []) != []: + return r.get("allCloudtable")[0].get("table").get("_id") + msg = f"No table {gcp_table_id} found in {gcp_dataset_id}. Please create in {self.graphql_url}" + logger.info(msg) + return None + + def get_dataset_config(self, dataset_id: str) -> Dict[str, Any]: + """ + Get dataset configuration. + + Args: + dataset_id (str): The ID for the dataset. + + Returns: + Dict: Dataset configuration. + """ + query = """ + query ($dataset_id: ID!){ + allDataset(id: $dataset_id) { + edges { + node { + slug + name + descriptionPt + createdAt + updatedAt + themes { + edges { + node { + namePt + } + } + } + tags { + edges { + node { + namePt + } + } + } + organization { + namePt + } + } + } + } + } + + """ + dataset_id = self._get_dataset_id_from_name(dataset_id) + if dataset_id: + variables = {"dataset_id": dataset_id} + response = self._execute_query(query=query, variables=variables) + return self._simplify_graphql_response(response).get("allDataset")[0] + else: + return {} + + def get_table_config(self, dataset_id: str, table_id: str) -> Dict[str, Any]: + """ + Get table configuration. + + Args: + dataset_id (str): The ID for the dataset. + table_id (str): The ID for the table. + + Returns: + Dict: Table configuration. + """ + + query = """ + query ($table_id: ID!){ + allTable(id: $table_id) { + edges { + node { + slug + dataset { + slug + organization { + slug + } + } + namePt + descriptionPt + columns { + edges { + node { + name + isInStaging + isPartition + descriptionPt + observations + bigqueryType { + name + } + } + } + } + } + } + } + } + """ + table_id = self._get_table_id_from_name( + gcp_dataset_id=dataset_id, gcp_table_id=table_id + ) + + if table_id: + variables = {"table_id": table_id} + response = self._execute_query(query=query, variables=variables) + return self._simplify_graphql_response(response).get("allTable")[0] + else: + return {} + + def _simplify_graphql_response(self, response: dict) -> dict: + """ + Simplify the graphql response + Args: + response: the graphql response + Returns: + dict: the simplified graphql response + """ + if response == {}: # pragma: no cover + return {} + + output_ = {} + + for key in response: + try: + if ( + isinstance(response[key], dict) + and response[key].get("edges") is not None # noqa + ): + output_[key] = [ + v.get("node") + for v in list( + map(self._simplify_graphql_response, response[key]["edges"]) + ) + ] + elif isinstance(response[key], dict): + output_[key] = self._simplify_graphql_response(response[key]) + else: + output_[key] = response[key] + except TypeError as e: + logger.error(f"Erro({e}): {key} - {response[key]}") + return output_ diff --git a/python-package/basedosdados/cli/cli.py b/python-package/basedosdados/cli/cli.py index a22468846..3c95e837a 100644 --- a/python-package/basedosdados/cli/cli.py +++ b/python-package/basedosdados/cli/cli.py @@ -4,705 +4,36 @@ # pylint: disable=locally-disabled, multiple-statements, fixme, line-too-long, too-many-arguments, invalid-name, too-many-lines, protected-access, unused-argument, no-value-for-parameter, redefined-builtin -import click -from basedosdados.upload.base import Base -from basedosdados.upload.dataset import Dataset -from basedosdados.upload.table import Table -from basedosdados.upload.storage import Storage -from basedosdados.upload.metadata import Metadata +try: + import click + + _cli_dependencies = True +except ImportError: + _cli_dependencies = False import basedosdados as bd -from basedosdados.exceptions import BaseDosDadosException -from ckanapi import CKANAPIError +from basedosdados.exceptions import BaseDosDadosMissingDependencyException +from basedosdados.upload.base import Base + +if not _cli_dependencies: + raise BaseDosDadosMissingDependencyException( + "Optional dependencies for the CLI are not installed. " + 'Please install basedosdados with the "cli" extra, such as:' + "\n\npip install basedosdados[cli]" + ) @click.group() -@click.option("--templates", default=None, help="Templates path") @click.option("--bucket_name", default=None, help="Project bucket name") -@click.option("--metadata_path", default=None, help="Folder to store metadata") @click.version_option(package_name="basedosdados") @click.pass_context -def cli(ctx, templates, bucket_name, metadata_path): +def cli(ctx, bucket_name): """ Function to define the CLI. """ ctx.obj = dict( - templates=templates, bucket_name=bucket_name, - metadata_path=metadata_path, - ) - - -@click.group(name="dataset") -@click.pass_context -def cli_dataset(ctx): - """ - Command to manage datasets. - """ - - -@cli_dataset.command(name="init", help="Initialize metadata files of dataset") -@click.argument("dataset_id") -@click.option( - "--replace", - is_flag=True, - help="Whether to replace current metadata files", -) -@click.pass_context -def init_dataset(ctx, dataset_id, replace): - """ - Initialize metadata files of dataset. - """ - - d = Dataset(dataset_id=dataset_id, **ctx.obj).init(replace=replace) - - click.echo( - click.style( - f"Dataset `{dataset_id}` folder and metadata were created at {d.metadata_path}", - fg="green", - ) - ) - - -def mode_text(mode, verb, obj_id): - """ - Returns a specific text for each mode. - """ - - if mode == "all": - text = f"Datasets `{obj_id}` and `{obj_id}_staging` were {verb} in BigQuery" - elif mode == "staging": - text = f"Dataset `{obj_id}_stating` was {verb} in BigQuery" - elif mode == "prod": - text = f"Dataset `{obj_id}` was {verb} in BigQuery" - - return text - - -@cli_dataset.command(name="create", help="Create dataset on BigQuery") -@click.argument("dataset_id") -@click.option( - "--mode", "-m", default="all", help="What datasets to create [prod|staging|all]" -) -@click.option( - "--if_exists", - default="raise", - help="[raise|update|replace|pass] if dataset alread exists", -) -@click.option( - "--dataset_is_public", - default=True, - help="Control if prod dataset is public or not. By default staging datasets like `dataset_id_staging` are not public.", -) -@click.option( - "--location", - default=None, - help="Location of dataset data. List of possible region names locations: https://cloud.google.com/bigquery/docs/locations", -) -@click.pass_context -def create_dataset(ctx, dataset_id, mode, if_exists, dataset_is_public, location): - """ - Create dataset on BigQuery. - """ - - Dataset(dataset_id=dataset_id, **ctx.obj).create( - mode=mode, - if_exists=if_exists, - dataset_is_public=dataset_is_public, - location=location, - ) - - click.echo( - click.style( - mode_text(mode, "created", dataset_id), - fg="green", - ) - ) - - -@cli_dataset.command(name="update", help="Update dataset on BigQuery") -@click.argument("dataset_id") -@click.option( - "--mode", "-m", default="all", help="What datasets to create [prod|staging|all]" -) -@click.pass_context -def update_dataset(ctx, dataset_id, mode, location): - """ - Update dataset on BigQuery. - """ - - Dataset(dataset_id=dataset_id, **ctx.obj).update(mode=mode, location=location) - - click.echo( - click.style( - mode_text(mode, "updated", dataset_id), - fg="green", - ) - ) - - -@cli_dataset.command(name="publicize", help="Make a dataset public") -@click.argument("dataset_id") -@click.option( - "--dataset_is_public", - default=True, - help="Control if prod dataset is public or not. By default staging datasets like `dataset_id_staging` are not public.", -) -@click.pass_context -def publicize_dataset(ctx, dataset_id, dataset_is_public): - """ - Publicize dataset. - """ - - Dataset(dataset_id=dataset_id, **ctx.obj).publicize( - dataset_is_public=dataset_is_public - ) - - click.echo( - click.style( - f"Dataset `{dataset_id}` became public!", - fg="green", - ) - ) - - -@cli_dataset.command(name="delete", help="Delete dataset") -@click.argument("dataset_id") -@click.option( - "--mode", "-m", default="all", help="What datasets to create [prod|staging|all]" -) -@click.pass_context -def delete_dataset(ctx, dataset_id, mode): - """ - Delete dataset. - """ - - if click.confirm(f"Are you sure you want to delete `{dataset_id}`?"): - - Dataset(dataset_id=dataset_id, **ctx.obj).delete(mode=mode) - - click.echo( - click.style( - mode_text(mode, "deleted", dataset_id), - fg="green", - ) - ) - - -@click.group(name="table") -def cli_table(): - """ - Command to manage tables. - """ - - -@cli_table.command(name="init", help="Create metadata files") -@click.argument("dataset_id") -@click.argument("table_id") -@click.option( - "--data_sample_path", - default=None, - help="Sample data used to pre-fill metadata", - type=click.Path(exists=True), -) -@click.option( - "--if_folder_exists", - default="raise", - help="[raise|replace|pass] actions if table folder exists", -) -@click.option( - "--if_table_config_exists", - default="raise", - help="[raise|replace|pass] actions if table config files already exist", -) -@click.option( - "--source_format", - default="csv", - help="Data source format. Only 'csv' is supported. Defaults to 'csv'.", -) -@click.option( - "--force_columns", - default=False, - help="Overwrite columns with local columns.", -) -@click.option( - "--columns_config_url_or_path", - default=None, - help="google sheets URL. Must be in the format https://docs.google.com/spreadsheets/d//edit#gid=. The sheet must contain the column name: 'coluna' and column description: 'descricao'.", -) -@click.pass_context -def init_table( - ctx, - dataset_id, - table_id, - data_sample_path, - if_folder_exists, - if_table_config_exists, - source_format, - force_columns, - columns_config_url_or_path, -): - """ - Initialize table metadata. - """ - - t = Table(table_id=table_id, dataset_id=dataset_id, **ctx.obj).init( - data_sample_path=data_sample_path, - if_folder_exists=if_folder_exists, - if_table_config_exists=if_table_config_exists, - source_format=source_format, - force_columns=force_columns, - columns_config_url_or_path=columns_config_url_or_path, - ) - - click.echo( - click.style( - f"Table `{table_id}` folder and metadata were created at {t.metadata_path}{dataset_id}", - fg="green", - ) - ) - - -@cli_table.command(name="create", help="Create stagging table in BigQuery") -@click.argument("dataset_id") -@click.argument("table_id") -@click.option( - "--path", - "-p", - type=click.Path(exists=True), - default=None, - help="Path of data folder or file.", -) -@click.option( - "--if_table_exists", - default="raise", - help="[raise|replace|pass] actions if table exists", -) -@click.option( - "--force_dataset", - default=True, - help="Whether to automatically create the dataset folders and in BigQuery", -) -@click.option( - "--if_storage_data_exists", - default="raise", - help="[raise|replace|pass] actions if table data already exists at Storage", -) -@click.option( - "--if_table_config_exists", - default="raise", - help="[raise|replace|pass] actions if table config files already exist", -) -@click.option( - "--source_format", - default="csv", - help="Data source format. Only 'csv' is supported. Defaults to 'csv'.", -) -@click.option( - "--force_columns", - default=False, - help="Overwrite columns with local columns.", -) -@click.option( - "--columns_config_url_or_path", - default=None, - help="Path to the local architeture file or a public google sheets URL. Path only suports csv, xls, xlsx, xlsm, xlsb, odf, ods, odt formats. Google sheets URL must be in the format https://docs.google.com/spreadsheets/d//edit#gid=.", -) -@click.option( - "--dataset_is_public", - default=True, - help="Control if prod dataset is public or not. By default staging datasets like `dataset_id_staging` are not public.", -) -@click.option( - "--location", - default=None, - help="Location of dataset data. List of possible region names locations: https://cloud.google.com/bigquery/docs/locations", -) -@click.option( - "--chunk_size", - default=None, - help="The size of a chunk of data whenever iterating (in bytes). This must be a multiple of 256 KB per the API specification.", -) -@click.pass_context -def create_table( - ctx, - dataset_id, - table_id, - path, - if_table_exists, - force_dataset, - if_storage_data_exists, - if_table_config_exists, - source_format, - force_columns, - columns_config_url_or_path, - dataset_is_public, - location, - chunk_size, -): - - """ - Create staging table in BigQuery. - """ - - Table(table_id=table_id, dataset_id=dataset_id, **ctx.obj).create( - path=path, - if_table_exists=if_table_exists, - force_dataset=force_dataset, - if_storage_data_exists=if_storage_data_exists, - if_table_config_exists=if_table_config_exists, - source_format=source_format, - force_columns=force_columns, - columns_config_url_or_path=columns_config_url_or_path, - dataset_is_public=dataset_is_public, - location=location, - chunk_size=chunk_size, - ) - - click.echo( - click.style( - f"Table `{dataset_id}_staging.{table_id}` was created in BigQuery", - fg="green", - ) - ) - - -@cli_table.command(name="update", help="Update tables in BigQuery") -@click.argument("dataset_id") -@click.argument("table_id") -@click.option( - "--mode", - default="all", - help="Choose a table from a dataset to update [prod|staging|all]", -) -@click.pass_context -def update_table(ctx, dataset_id, table_id, mode): - """ - Update tables in BigQuery. - """ - - Table(table_id=table_id, dataset_id=dataset_id, **ctx.obj).update( - mode=mode, - ) - - click.echo( - click.style( - f"All tables `{dataset_id}*.{table_id}` were updated in BigQuery", - fg="green", - ) - ) - - -@cli_table.command( - name="update_columns", help="Update columns fields in tables_config.yaml " -) -@click.argument("dataset_id") -@click.argument("table_id") -@click.option( - "--columns_config_url_or_path", - default=None, - help="""\nFills columns in table_config.yaml automatically using a public google sheets URL or a local file. Also regenerate - \npublish.sql and autofill type using bigquery_type.\n - - \nThe sheet must contain the columns:\n - - name: column name\n - - description: column description\n - - bigquery_type: column bigquery type\n - - measurement_unit: column mesurement unit\n - - covered_by_dictionary: column related dictionary\n - - directory_column: column related directory in the format .:\n - - temporal_coverage: column temporal coverage\n - - has_sensitive_data: the column has sensitive data\n - - observations: column observations\n - \nArgs:\n - \ncolumns_config_url_or_path (str): Path to the local architeture file or a public google sheets URL.\n - Path only suports csv, xls, xlsx, xlsm, xlsb, odf, ods, odt formats.\n - Google sheets URL must be in the format https://docs.google.com/spreadsheets/d//edit#gid=.\n -""", -) -@click.pass_context -def update_columns(ctx, dataset_id, table_id, columns_config_url_or_path): - """ - Update columns fields in tables_config.yaml - """ - - Table(table_id=table_id, dataset_id=dataset_id, **ctx.obj).update_columns( - columns_config_url_or_path=columns_config_url_or_path, - ) - - click.echo( - click.style( - f"Columns from `{dataset_id}.{table_id}` were updated in table_config.yaml", - fg="green", - ) - ) - - -@cli_table.command(name="publish", help="Publish staging table to prod") -@click.argument("dataset_id") -@click.argument("table_id") -@click.option( - "--if_exists", - default="raise", - help="[raise|replace] actions if table exists", -) -@click.pass_context -def publish_table(ctx, dataset_id, table_id, if_exists): - """ - Publish staging table to prod. - """ - - Table(table_id=table_id, dataset_id=dataset_id, **ctx.obj).publish( - if_exists=if_exists, - ) - - click.echo( - click.style( - f"Table `{dataset_id}.{table_id}` was published in BigQuery", - fg="green", - ) - ) - - -@cli_table.command(name="delete", help="Delete BigQuery table") -@click.argument("dataset_id") -@click.argument("table_id") -@click.option("--mode", help="Which table to delete [prod|staging]", required=True) -@click.pass_context -def delete_table(ctx, dataset_id, table_id, mode): - """ - Delete BigQuery table. - """ - - Table(table_id=table_id, dataset_id=dataset_id, **ctx.obj).delete( - mode=mode, - ) - - -@cli_table.command(name="append", help="Append new data to existing table") -@click.argument("dataset_id") -@click.argument("table_id") -@click.argument("filepath", type=click.Path(exists=True)) -@click.option("--partitions", help="Data partition as `value=key/value2=key2`") -@click.option( - "--if_exists", - default="raise", - help="[raise|replace|pass] if file alread exists", -) -@click.option( - "--chunk_size", - default=None, - help="The size of a chunk of data whenever iterating (in bytes). This must be a multiple of 256 KB per the API specification.", -) -@click.pass_context -def upload_table( - ctx, dataset_id, table_id, filepath, partitions, if_exists, chunk_size -): - - """ - Upload data to BigQuery table. - """ - Table(table_id=table_id, dataset_id=dataset_id, **ctx.obj).append( - filepath=filepath, - partitions=partitions, - if_exists=if_exists, - chunk_size=chunk_size, - ) - - click.echo( - click.style( - f"Data was added to `{dataset_id}.{table_id}`", - fg="green", - ) - ) - - -@click.group(name="storage") -def cli_storage(): - """ - Commands for Google Cloud Storage. - """ - - -@cli_storage.command(name="init", help="Create bucket and initial folders") -@click.option("--bucket_name", default="basedosdados", help="Bucket name") -@click.option( - "--replace", - is_flag=True, - help="Whether to replace current bucket files", -) -@click.option( - "--very-sure/--not-sure", - default=False, - help="Are you sure that you want to replace current bucket files?", -) -@click.pass_context -def init_storage(ctx, bucket_name, replace, very_sure): - """ - Initialize bucket and folders. - """ - - # TODO: Create config file to store bucket_name, etc... - ctx.obj.pop("bucket_name") - Storage(bucket_name=bucket_name, **ctx.obj).init( - replace=replace, very_sure=very_sure - ) - - click.echo( - click.style( - f"Bucket `{bucket_name}` was created", - fg="green", - ) - ) - - -@cli_storage.command(name="upload", help="Upload file to bucket") -@click.argument("dataset_id") -@click.argument("table_id") -@click.argument("filepath", type=click.Path(exists=True)) -@click.option( - "--mode", - "-m", - required=True, - help="Where to save the file [raw|staging|header|auxiliary_files|architecture|all]", -) -@click.option("--partitions", help="Data partition as `value=key/value2=key2`") -@click.option( - "--if_exists", - default="raise", - help="[raise|replace|pass] if file alread exists", -) -@click.option( - "--chunk_size", - default=None, - help="The size of a chunk of data whenever iterating (in bytes). This must be a multiple of 256 KB per the API specification.", -) -@click.pass_context -def upload_storage( - ctx, dataset_id, table_id, filepath, mode, partitions, if_exists, chunk_size -): - - ''' - Upload file to bucket. - ''' - - ctx.obj.pop("bucket_name") - Storage(dataset_id, table_id, **ctx.obj).upload( - filepath=filepath, - mode=mode, - partitions=partitions, - if_exists=if_exists, - chunk_size=chunk_size, - ) - - click.echo( - click.style( - "Data was added", - fg="green", - ) - ) - - -@cli_storage.command(name="download", help="Download file from bucket") -@click.argument("dataset_id") -@click.argument("table_id") -@click.argument("savepath", type=click.Path(exists=True)) -@click.option( - "--filename", - "-f", - default="*", - help="filename to download single file. If * downloads all files from bucket folder", -) -@click.option( - "--mode", - "-m", - default="raw", - help="Where to download data from [raw|staging|header|auxiliary_files|architecture]", -) -@click.option("--partitions", help="Data partition as `value=key/value2=key2`") -@click.option( - "--if_not_exists", - default="raise", - help="[raise|pass] if file file not found at bucket folder", -) -@click.pass_context -def download_storage( - ctx, dataset_id, table_id, filename, savepath, partitions, mode, if_not_exists -): - """ - Download file from bucket. - """ - Storage(dataset_id, table_id, **ctx.obj).download( - filename, savepath, partitions, mode, if_not_exists - ) - - click.echo( - click.style( - f"Data was downloaded to `{savepath}`", - fg="green", - ) - ) - - -@cli_storage.command(name="delete_table", help="Delete table from bucket") -@click.argument("dataset_id") -@click.argument("table_id") -@click.option( - "--mode", - "-m", - required=True, - default="staging", - help="Where to delete the file from [raw|staging|header|auxiliary_files|architecture]", -) -@click.option( - "--bucket_name", - default=None, - help="Bucket from which to delete data, you can change it to delete from a bucket other than yours", -) -@click.option("--not_found_ok", default=False, help="what to do if table not found") -@click.pass_context -def storage_delete_table(ctx, dataset_id, table_id, mode, not_found_ok, bucket_name): - """ - Delete table from bucket. - """ - Storage(dataset_id, table_id, **ctx.obj).delete_table( - mode=mode, not_found_ok=not_found_ok, bucket_name=bucket_name - ) - click.echo( - click.style( - f"Data was deleted from bucket `{bucket_name}`", - fg="green", - ) - ) - - -@cli_storage.command(name="copy_table", help="Copy table to your bucket") -@click.argument("dataset_id") -@click.argument("table_id") -@click.option("--source_bucket_name", required=True, default="basedosdados") -@click.option( - "--dst_bucket_name", - default=None, - help="Bucket where data will be copied to, defaults to your bucket", -) -@click.option( - "--mode", - "-m", - default="staging", - help="which bucket folder to get the table [raw|staging|header|auxiliary_files|architecture]", -) -@click.pass_context -def storage_copy_table( - ctx, dataset_id, table_id, source_bucket_name, dst_bucket_name, mode -): - """ - Copy table from another bucket. - """ - Storage(dataset_id, table_id, **ctx.obj).copy_table( - source_bucket_name=source_bucket_name, - destination_bucket_name=dst_bucket_name, - mode=mode, ) @@ -848,174 +179,6 @@ def cli_get_table_columns( ) -@click.group(name="metadata") -def cli_metadata(): - """ - CLI metadata commands. - """ - - -@cli_metadata.command(name="create", help="Creates new metadata config file") -@click.argument("dataset_id") -@click.argument("table_id", required=False) -@click.option( - "--if_exists", - default="raise", - help="[raise|replace|pass] if metadata config file alread exists", -) -@click.option( - "--columns", - default=[], - help="Data columns. Example: --columns=col1,col2", - callback=lambda _, __, x: x.split(",") if x else [], -) -@click.option( - "--partition_columns", - default=[], - help="Columns that partition the data. Example: --partition_columns=col1,col2", - callback=lambda _, __, x: x.split(",") if x else [], -) -@click.option( - "--force_columns", - default=False, - help="Overwrite columns with local columns.", -) -@click.option( - "--table_only", - default=True, - help=( - "Force the creation of `table_config.yaml` file only if `dataset_conf" - "ig.yaml` doesn't exist." - ), -) -@click.pass_context -def cli_create_metadata( - ctx, - dataset_id, - table_id, - if_exists, - columns, - partition_columns, - force_columns, - table_only, -): - """ - Creates new metadata config file. - """ - - m = Metadata(dataset_id, table_id, **ctx.obj).create( - if_exists=if_exists, - columns=columns, - partition_columns=partition_columns, - force_columns=force_columns, - table_only=table_only, - ) - - click.echo( - click.style( - f"Metadata file was created at `{m.filepath}`", - fg="green", - ) - ) - - -@cli_metadata.command( - name="is_updated", help="Check if user's local metadata is updated" -) -@click.argument("dataset_id") -@click.argument("table_id", required=False) -@click.pass_context -def cli_is_updated_metadata(ctx, dataset_id, table_id): - """ - Check if user's local metadata is updated. - """ - m = Metadata(dataset_id, table_id, **ctx.obj) - - if m.is_updated(): - msg, color = "Local metadata is updated.", "green" - else: - msg = ( - "Local metadata is out of date. Please run `basedosdados metadata" - " create` with the flag `if_exists=replace` to get the updated da" - "ta." - ) - color = "red" - - click.echo(click.style(msg, fg=color)) - - -@cli_metadata.command(name="validate", help="Validate user's local metadata") -@click.argument("dataset_id") -@click.argument("table_id", required=False) -@click.pass_context -def cli_validate_metadata(ctx, dataset_id, table_id): - """ - Validate user's local metadata. - """ - m = Metadata(dataset_id, table_id, **ctx.obj) - - try: - m.validate() - msg, color = "Local metadata is valid.", "green" - except BaseDosDadosException as e: - msg = ( - f"Local metadata is invalid. Please check the traceback below for" - f" more information on how to fix it:\n\n{repr(e)}" - ) - color = "red" - - click.echo(click.style(msg, fg=color)) - - -@cli_metadata.command(name="publish", help="Publish user's local metadata") -@click.argument("dataset_id") -@click.argument("table_id", required=False) -@click.option( - "--all", - default=False, - help=( - "Force the publishment of metadata specified in both `dataset_config." - "yaml` and `table_config.yaml` at once." - ), -) -@click.option( - "--if_exists", - default="raise", - help=("Define what to do in case metadata already exists in CKAN."), -) -@click.option( - "--update_locally", - default=False, - help=("Update local metadata with the new CKAN metadata on publish."), -) -@click.pass_context -def cli_publish_metadata( - ctx, - dataset_id, - table_id, - all, - if_exists, - update_locally, -): - """ - Publish user's local metadata. - """ - m = Metadata(dataset_id, table_id, **ctx.obj) - - try: - m.publish(all=all, if_exists=if_exists, update_locally=update_locally) - msg, color = "Local metadata has been published.", "green" - except (CKANAPIError, BaseDosDadosException, AssertionError) as e: - msg = ( - f"Local metadata couldn't be published due to an error. Pleas" - f"e check the traceback below for more information on how to " - f"fix it:\n\n{repr(e)}" - ) - color = "red" - - click.echo(click.style(msg, fg=color)) - - @click.group(name="config") def cli_config(): """ @@ -1038,16 +201,6 @@ def init(ctx, overwrite): Base(overwrite_cli_config=overwrite, **ctx.obj) -@cli_config.command(name="refresh_template", help="Overwrite current templates") -@click.pass_context -def init_refresh_templates(ctx): - """ - Initialize configuration. - """ - - Base(**ctx.obj)._refresh_templates() - - # Allow anomalous backslash in string: '\ ' (it's used in gcloud sdk) # pylint: disable=W1401 @click.command( @@ -1055,7 +208,7 @@ def init_refresh_templates(ctx): help=( "Downloads data do SAVEPATH. SAVEPATH must point to a .csv file.\n\n" "Example: \n\n" - 'basedosdados download data.csv --query="select * from basedosdados.br_ibge_pib.municipio limit 10" \ --billing_project_id=basedosdados-dev' + 'basedosdados download data.csv --query="select * from basedosdados.br_ibge_pib.municipio limit 10" --billing_project_id=basedosdados-dev' ), ) @click.argument( @@ -1137,17 +290,12 @@ def cli_reauth(): bd.reauth() -cli.add_command(cli_dataset) -cli.add_command(cli_table) -cli.add_command(cli_storage) cli.add_command(cli_config) cli.add_command(cli_download) cli.add_command(cli_reauth) cli.add_command(cli_list) cli.add_command(cli_get) -cli.add_command(cli_metadata) if __name__ == "__main__": - cli() diff --git a/python-package/basedosdados/configs/config.toml b/python-package/basedosdados/configs/config.toml index cb9817a26..59a364022 100644 --- a/python-package/basedosdados/configs/config.toml +++ b/python-package/basedosdados/configs/config.toml @@ -1,14 +1,7 @@ -# Where to save all metadata data, such as Dataset and Table config files. -# We reccomend this path to be the same as the github repo clone -metadata_path = "" - -# What is the bucket that you are saving all the data? It should be +# What is the bucket that you are saving all the data? It should be # an unique name. bucket_name = "" -# Where the templates to generate configs and descriptions sit -templates_path = "" - [gcloud-projects] [gcloud-projects.staging] @@ -19,13 +12,5 @@ templates_path = "" name = "" credentials_path = "" -# Data about you to auto-fill config -# TODO: issue 20 -[user] -name = "" -email = "" -website = "" - -[ckan] -url = "" -api_key = "" +[api] +url = "" \ No newline at end of file diff --git a/python-package/basedosdados/configs/templates/table/publish.sql b/python-package/basedosdados/configs/templates/table/publish.sql index be9e6d2f6..d7b86b4f1 100644 --- a/python-package/basedosdados/configs/templates/table/publish.sql +++ b/python-package/basedosdados/configs/templates/table/publish.sql @@ -20,7 +20,7 @@ TIPOS: */ {% set project = project_id_prod %} CREATE VIEW {{ project }}.{{ dataset_id }}.{{ table_id }} AS -SELECT +SELECT {% for column in columns|list + partition_columns|list -%} {%- if not loop.last -%} SAFE_CAST({{ column }} AS STRING) {{ column }}, diff --git a/python-package/basedosdados/constants.py b/python-package/basedosdados/constants.py index 6c54472ad..e8fec1c34 100644 --- a/python-package/basedosdados/constants.py +++ b/python-package/basedosdados/constants.py @@ -1,18 +1,19 @@ -''' +""" Constants for the project. -''' +""" # pylint: disable=C0103 __all__ = ["config", "constants"] -from enum import Enum from dataclasses import dataclass +from enum import Enum @dataclass class config: - ''' + """ Configuration for the project. - ''' + """ + verbose: bool = True billing_project_id: str = None project_config_path: str = None @@ -20,10 +21,16 @@ class config: class constants(Enum): - ''' + """ Constants for the project. - ''' + """ + ENV_CONFIG: str = "BASEDOSDADOS_CONFIG" ENV_CREDENTIALS_PREFIX: str = "BASEDOSDADOS_CREDENTIALS_" ENV_CREDENTIALS_PROD: str = "BASEDOSDADOS_CREDENTIALS_PROD" ENV_CREDENTIALS_STAGING: str = "BASEDOSDADOS_CREDENTIALS_STAGING" + TOKEN_FILE: str = ".token.json" + TOKEN_URL: str = "/api/token/" + REFRESH_TOKEN_URL: str = "/api/token/refresh/" + VERIFY_TOKEN_URL: str = "/api/token/verify/" + TEST_ENDPOINT: str = "/api/v1/private/bigquerytypes/" diff --git a/python-package/basedosdados/download/base.py b/python-package/basedosdados/download/base.py index 2b7628515..66a209417 100644 --- a/python-package/basedosdados/download/base.py +++ b/python-package/basedosdados/download/base.py @@ -1,15 +1,13 @@ -''' +""" Functions for manage auth and credentials -''' -# pylint: disable=redefined-outer-name, protected-access, no-name-in-module, import-error,line-too-long -from functools import lru_cache - +""" import sys -from google.cloud import bigquery, storage +# pylint: disable=redefined-outer-name, protected-access, no-name-in-module, import-error,line-too-long +from functools import lru_cache import pydata_google_auth - +from google.cloud import bigquery, storage from basedosdados.upload.base import Base @@ -19,9 +17,9 @@ def reauth(): - ''' + """ Reauth user credentials - ''' + """ pydata_google_auth.get_user_credentials( SCOPES, credentials_cache=pydata_google_auth.cache.REAUTH @@ -29,11 +27,11 @@ def reauth(): def credentials(from_file=False, reauth=False): - ''' + """ Get user credentials - ''' + """ - #check if is running in colab + # check if is running in colab if "google.colab" in sys.modules: from google.colab import auth # pylint: disable=import-outside-toplevel @@ -49,16 +47,15 @@ def credentials(from_file=False, reauth=False): ) return pydata_google_auth.get_user_credentials( - SCOPES, + SCOPES, ) - @lru_cache(256) def google_client(billing_project_id, from_file, reauth): - ''' + """ Get Google Cloud client for bigquery and storage - ''' + """ return dict( bigquery=bigquery.Client( diff --git a/python-package/basedosdados/download/download.py b/python-package/basedosdados/download/download.py index d0f1e5219..af0d58a2e 100644 --- a/python-package/basedosdados/download/download.py +++ b/python-package/basedosdados/download/download.py @@ -1,30 +1,30 @@ """ Functions for managing downloads """ -# pylint: disable=too-many-arguments, fixme, invalid-name, protected-access,line-too-long -from pathlib import Path -from functools import partialmethod +import gzip +import os import re -import time import shutil -import os -import gzip +import time +from functools import partialmethod + +# pylint: disable=too-many-arguments, fixme, invalid-name, protected-access,line-too-long +from pathlib import Path -from pydata_google_auth.exceptions import PyDataCredentialsError -from google.cloud import bigquery_storage_v1 -from google.cloud import bigquery import pandas_gbq +from google.cloud import bigquery, bigquery_storage_v1 from pandas_gbq.gbq import GenericGBQException +from pydata_google_auth.exceptions import PyDataCredentialsError -from basedosdados.download.base import google_client, credentials +from basedosdados.constants import config +from basedosdados.download.base import credentials, google_client from basedosdados.exceptions import ( - BaseDosDadosException, BaseDosDadosAccessDeniedException, BaseDosDadosAuthorizationException, + BaseDosDadosException, BaseDosDadosInvalidProjectIDException, BaseDosDadosNoBillingProjectIDException, ) -from basedosdados.constants import config def _set_config_variables(billing_project_id, from_file): @@ -151,11 +151,10 @@ def read_table( if (dataset_id is not None) and (table_id is not None): query = f""" - SELECT * + SELECT * FROM `{query_project_id}.{dataset_id}.{table_id}`""" if limit is not None: - query += f" LIMIT {limit}" else: raise BaseDosDadosException("Both table_id and dataset_id should be filled.") @@ -244,7 +243,7 @@ def download( not _is_table(client, dataset_id, table_id, query_project_id) or limit ): query = f""" - SELECT * + SELECT * FROM {query_project_id}.{dataset_id}.{table_id} """ diff --git a/python-package/basedosdados/download/metadata.py b/python-package/basedosdados/download/metadata.py index f1abf7b4b..0861da8b5 100644 --- a/python-package/basedosdados/download/metadata.py +++ b/python-package/basedosdados/download/metadata.py @@ -1,13 +1,15 @@ -''' +""" Functions to get metadata from BD's API -''' +""" +import math + # pylint: disable=invalid-name,use-maxsplit-arg,line-too-long from collections import defaultdict -import math -from google.cloud import bigquery import pandas as pd import requests +from google.cloud import bigquery + def _safe_fetch(url: str): """ @@ -47,10 +49,9 @@ def _dict_from_page(json_response): def _fix_size(s, step=80): - final = "" - for l in s.split(" "): + for l in s.split(" "): # noqa final += (l + " ") if len(final.split("\n")[-1]) < step else "\n" return final @@ -82,7 +83,7 @@ def _handle_output(verbose, output_type, df, col_name=None): col_name (str): name of column with id's data """ - df_is_dataframe = isinstance(df,pd.DataFrame) + df_is_dataframe = isinstance(df, pd.DataFrame) df_is_bq_dataset_or_table = isinstance(df, bigquery.Table) df_is_bq_dataset_or_table |= isinstance(df, bigquery.Dataset) @@ -103,6 +104,7 @@ def _handle_output(verbose, output_type, df, col_name=None): raise ValueError(msg) raise TypeError("`verbose` argument must be of `bool` type.") + def list_datasets(with_description=False, verbose=True): """ This function uses `bd_dataset_search` website API @@ -164,7 +166,9 @@ def list_datasets(with_description=False, verbose=True): } for k in range(len(dataset_dict["dataset_id"])) ] - raise ValueError("`verbose` and `with_description` argument must be of `bool` type.") + raise ValueError( + "`verbose` and `with_description` argument must be of `bool` type." + ) def list_dataset_tables( @@ -231,7 +235,9 @@ def list_dataset_tables( for k in range(len(table_dict["table_id"])) ] - raise ValueError("`verbose` and `with_description` argument must be of `bool` type.") + raise ValueError( + "`verbose` and `with_description` argument must be of `bool` type." + ) def get_dataset_description( @@ -301,7 +307,6 @@ def get_table_columns( table_id, verbose=True, ): - """ Fetch the names, types and descriptions for the columns in the specified table. Prints information on screen. diff --git a/python-package/basedosdados/exceptions.py b/python-package/basedosdados/exceptions.py index 0c7698cdc..0a956edbf 100644 --- a/python-package/basedosdados/exceptions.py +++ b/python-package/basedosdados/exceptions.py @@ -2,7 +2,8 @@ Exception classes for the package. """ -#pylint: disable=C0301 +# pylint: disable=C0301 + class BaseDosDadosException(Exception): """Exclusive Exception from Base dos Dados""" @@ -83,3 +84,7 @@ def __init__(self): "authorization code." ) super().__init__(self.message) + + +class BaseDosDadosMissingDependencyException(BaseDosDadosException): + """Exception raised if one of the optional dependencies is missing.""" diff --git a/python-package/basedosdados/schemas/columns_schema.json b/python-package/basedosdados/schemas/columns_schema.json new file mode 100644 index 000000000..115cce55a --- /dev/null +++ b/python-package/basedosdados/schemas/columns_schema.json @@ -0,0 +1,226 @@ +{ + "help": "https://basedosdados.org/api/3/action/help_show?name=bd_bdm_columns_schema", + "success": true, + "result": { + "title": "BdmColumns", + "type": "object", + "properties": { + "name": { + "title": "Nome", + "description": "Nome em produção", + "yaml_order": { + "id_before": null, + "id_after": "bigquery_type" + }, + "type": "string" + }, + "bigquery_type": { + "title": "Tipo no BigQuery", + "description": "Tipo no BigQuery.\nOpções: string, int64, float64, date, time, geometry.Ver https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types.", + "yaml_order": { + "id_before": "name", + "id_after": "description" + }, + "allOf": [ + { + "$ref": "#/definitions/BigQueryTypeEnum" + } + ] + }, + "description": { + "title": "Descrição", + "description": "Descrição", + "yaml_order": { + "id_before": "bigquery_type", + "id_after": "temporal_coverage" + }, + "type": "string" + }, + "temporal_coverage": { + "title": "Cobertura Temporal", + "description": "Anos cobertos pela tabela.\nPreenchido como lista de intervalos sem repetir os metadados da tabela.Exemplo: 2001(1)2010, ou (1)2020, ou (1).", + "yaml_order": { + "id_before": "description", + "id_after": "covered_by_dictionary" + }, + "allOf": [ + { + "$ref": "#/definitions/TemporalCoverageEnum" + } + ] + }, + "covered_by_dictionary": { + "title": "Coberta por um Dicionário", + "description": "A coluna precisa de dicionário?\nOpções: yes, no.", + "default": "no", + "yaml_order": { + "id_before": "temporal_coverage", + "id_after": "directory_column" + }, + "allOf": [ + { + "$ref": "#/definitions/YesNoEnum" + } + ] + }, + "directory_column": { + "title": "Coluna Correspondente nos Diretórios", + "description": "Chave primária nos diretórios correspondente à coluna.", + "yaml_order": { + "id_before": "covered_by_dictionary", + "id_after": "measurement_unit" + }, + "allOf": [ + { + "$ref": "#/definitions/DirectoryColumn" + } + ] + }, + "measurement_unit": { + "title": "Unidade de Medida", + "description": "Qual é a unidade de medida da coluna?\nEscreva a fórmula matemática baseada nas chaves de unidades básicas permitidas em https://basedosdados.org/api/3/action/bd_available_options na seção Measurement Unit.\nExemplos: 'kilometer^2', 'meter^3 / second', '1000 * person', 'gigawatt'.", + "yaml_order": { + "id_before": "directory_column", + "id_after": "has_sensitive_data" + }, + "type": "string" + }, + "has_sensitive_data": { + "title": "Contém Dados Sensíveis (LGPD)", + "description": "A coluna contém dados sensíveis, como definido pela Lei Geral de Proteção de Dados (LGPD)?\nOpções: yes, no.", + "default": "no", + "yaml_order": { + "id_before": "measurement_unit", + "id_after": "observations" + }, + "allOf": [ + { + "$ref": "#/definitions/YesNoEnum" + } + ] + }, + "observations": { + "title": "Observações", + "description": "Informações sobre a coluna: arquitetura, decisões de limpeza, etc.", + "yaml_order": { + "id_before": "has_sensitive_data", + "id_after": "is_in_staging" + }, + "type": "string" + }, + "is_in_staging": { + "title": "Está em Staging", + "description": "A coluna está na tabela staging?\nOpções: True, False", + "default": true, + "yaml_order": { + "id_before": "observations", + "id_after": "is_partition" + }, + "type": "boolean" + }, + "is_partition": { + "title": "É Partição", + "description": "A coluna é uma partição?\nOpções: True, False", + "default": false, + "yaml_order": { + "id_before": "is_in_staging", + "id_after": null + }, + "type": "boolean" + } + }, + "required": [ + "name" + ], + "definitions": { + "BigQueryTypeEnum": { + "title": "BigQueryTypeEnum", + "description": "An enumeration.", + "enum": [ + "array", + "boolean", + "date", + "datetime", + "float64", + "geography", + "int64", + "numeric", + "string", + "struct", + "time", + "timestamp" + ], + "type": "string" + }, + "TemporalCoverageEnum": { + "title": "TemporalCoverageEnum", + "type": "array", + "items": { + "type": "string" + } + }, + "YesNoEnum": { + "title": "YesNoEnum", + "description": "An enumeration.", + "enum": [ + "yes", + "no" + ], + "type": "string" + }, + "DirectoryEnum": { + "title": "DirectoryEnum", + "description": "An enumeration.", + "enum": [ + "br_bd_diretorios_africa_sul", + "br_bd_diretorios_alemanha", + "br_bd_diretorios_argentina", + "br_bd_diretorios_australia", + "br_bd_diretorios_bolivia", + "br_bd_diretorios_brasil", + "br_bd_diretorios_canada", + "br_bd_diretorios_chile", + "br_bd_diretorios_china", + "br_bd_diretorios_colombia", + "br_bd_diretorios_data_tempo", + "br_bd_diretorios_dinamarca", + "br_bd_diretorios_espanha", + "br_bd_diretorios_franca", + "br_bd_diretorios_india", + "br_bd_diretorios_inglaterra", + "br_bd_diretorios_eua", + "br_bd_diretorios_mexico", + "br_bd_diretorios_mundo", + "br_bd_diretorios_noruega", + "br_bd_diretorios_peru", + "br_bd_diretorios_portugal", + "br_bd_diretorios_suecia", + "br_bd_diretorios_suica" + ], + "type": "string" + }, + "DirectoryColumn": { + "title": "DirectoryColumn", + "type": "object", + "properties": { + "dataset_id": { + "title": "ID Conjunto", + "allOf": [ + { + "$ref": "#/definitions/DirectoryEnum" + } + ] + }, + "table_id": { + "title": "ID Tabela", + "type": "string" + }, + "column_name": { + "title": "Nome Coluna", + "type": "string" + } + } + } + } + } +} \ No newline at end of file diff --git a/python-package/basedosdados/schemas/dataset_schema.json b/python-package/basedosdados/schemas/dataset_schema.json new file mode 100644 index 000000000..0bee5f76e --- /dev/null +++ b/python-package/basedosdados/schemas/dataset_schema.json @@ -0,0 +1,2127 @@ +{ + "help": "https://basedosdados.org/api/3/action/help_show?name=bd_dataset_schema", + "success": true, + "result": { + "title": "Dataset", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "dataset_id": { + "title": "ID", + "description": "UUID do conjunto de dados (será preenchido automaticamente).", + "yaml_order": { + "id_before": null, + "id_after": "dataset_slug" + }, + "type": "string" + }, + "dataset_slug": { + "title": "Slug", + "description": "Nome (slug) do conjunto\nExemplos: br_ibge_populacao, br_tse_eleicoes", + "yaml_order": { + "id_before": "dataset_id", + "id_after": "name" + }, + "type": "string" + }, + "name": { + "title": "Nome", + "description": "Nome do conjunto, a ser exibido no mecanismo de busca.\nExemplo: População brasileira", + "yaml_order": { + "id_before": "dataset_slug", + "id_after": "organization" + }, + "type": "string" + }, + "organization": { + "title": "ID Organização", + "description": "Qual organização disponibiliza os dados originais?\nOpções: escolher dessa lista -> https://basedosdados.org/api/3/action/organization_list\nSe a organização não estiver na lista acima ou o nome não estiver conforme o manual de estilo\ncriar ou renomear a organização em https://basedosdados.org/organization/\nExemplos: br-ibge, br-tse, br-rj-gov", + "yaml_order": { + "id_before": "name", + "id_after": "description" + } + }, + "description": { + "title": "Descrição", + "description": "Descrição do conjunto", + "yaml_order": { + "id_before": "organization", + "id_after": "themes" + }, + "type": "string" + }, + "type": { + "title": "Tipo", + "enum": [ + "dataset" + ], + "type": "string" + }, + "author": { + "title": "Author", + "type": "string" + }, + "author_email": { + "title": "Author Email", + "type": "string" + }, + "maintainer": { + "title": "Maintainer", + "type": "string" + }, + "maintainer_email": { + "title": "Maintainer Email", + "type": "string" + }, + "state": { + "title": "Estado", + "enum": [ + "active", + "draft", + "deleted" + ], + "type": "string" + }, + "license_id": { + "title": "ID da Licença", + "type": "string" + }, + "url": { + "title": "Url", + "type": "string" + }, + "version": { + "title": "Versão", + "type": "string" + }, + "metadata_created": { + "title": "Data de Criação", + "type": "string", + "format": "date-time" + }, + "metadata_modified": { + "title": "Data de Modificação", + "description": "Não altere esse campo.\nData da última modificação dos metadados gerada automaticamente pelo CKAN.", + "yaml_order": { + "id_before": "tags", + "id_after": null + }, + "type": "string", + "format": "date-time" + }, + "creator_user_id": { + "title": "ID do(a) Usuário(a) Criador(a)", + "type": "string", + "format": "uuid" + }, + "private": { + "title": "Privado", + "type": "boolean" + }, + "license_title": { + "title": "Título da Licença", + "type": "string" + }, + "num_resources": { + "title": "Número de Recursos", + "type": "integer" + }, + "resources": { + "title": "Resources", + "default": [], + "type": "array", + "items": { + "discriminator": { + "propertyName": "resource_type", + "mapping": { + "external_link": "#/definitions/ExternalLink", + "bdm_table": "#/definitions/BdmTable", + "information_request": "#/definitions/InformationRequest", + "bdm_dictionary": "#/definitions/BdmDictionary" + } + }, + "oneOf": [ + { + "$ref": "#/definitions/ExternalLink" + }, + { + "$ref": "#/definitions/BdmTable" + }, + { + "$ref": "#/definitions/InformationRequest" + }, + { + "$ref": "#/definitions/BdmDictionary" + } + ] + } + }, + "themes": { + "title": "Temas", + "description": "Quais temas caracterizam a base?\nOpções: escolher dessa lista -> https://basedosdados.org/api/3/action/group_list\nImportante: preencher com a chave, e não o valor.", + "yaml_order": { + "id_before": "description", + "id_after": "tags" + } + }, + "owner_org": { + "title": "Owner Org", + "type": "string", + "format": "uuid" + }, + "num_tags": { + "title": "Num Tags", + "type": "integer" + }, + "tags": { + "title": "Etiquetas", + "description": "Quais etiquetas caracterizam a base?\nOpções: escolher dessa lista -> https://basedosdados.org/api/3/action/tag_list\nExemplos:\n - fertilidade\n - preco\n - desmatamento\nCaso crie etiquetas novas, as regras são:\n - letras minúsculas\n - sem acentos\n - sempre no singular\n - não repita nomes de grupos (ex. educacao, saude, meio ambiente, economia, etc.)", + "yaml_order": { + "id_before": "themes", + "id_after": "metadata_modified" + } + }, + "relationships_as_object": { + "title": "Relationships As Object" + }, + "relationships_as_subject": { + "title": "Relationships As Subject" + }, + "action__": { + "title": "Action ", + "enum": [ + "package_show", + "package_create", + "package_update" + ], + "type": "string" + }, + "short_description": { + "title": "Descrição curta", + "description": "Descrição curta (até 280 caracteres) do conjunto.", + "type": "string" + }, + "ckan_url": { + "title": "Url CKAN", + "description": "Url completa do CKAN já contendo o dataset-id\nExemplo: https://basedosdados.org/dataset/", + "type": "string" + }, + "github_url": { + "title": "Url Github", + "description": "Url completa do Github já contendo o dataset_id\nExemplo: https://github.com/basedosdados/mais/tree/master/bases/", + "type": "string" + }, + "visibility": { + "title": "Visibilidade" + }, + "cache_last_updated": { + "title": "Cache Last Updated", + "type": "string", + "format": "date-time" + }, + "isopen": { + "title": "Isopen", + "type": "boolean" + } + }, + "required": [ + "name", + "type", + "private", + "owner_org" + ], + "definitions": { + "LanguageEnum": { + "title": "LanguageEnum", + "description": "An enumeration.", + "enum": [ + "german", + "arabic", + "bahasa", + "bengali", + "chinese", + "spanish", + "french", + "hebrew", + "hindi", + "english", + "japanese", + "malay", + "portuguese", + "russian", + "thai", + "urdu" + ], + "type": "string" + }, + "YesNoEnum": { + "title": "YesNoEnum", + "description": "An enumeration.", + "enum": [ + "yes", + "no" + ], + "type": "string" + }, + "AvailabilityEnum": { + "title": "AvailabilityEnum", + "description": "An enumeration.", + "enum": [ + "online", + "physical", + "in_person" + ], + "type": "string" + }, + "CountryEnum": { + "title": "CountryEnum", + "description": "An enumeration.", + "enum": [ + "br", + "ar", + "bo", + "cl", + "co", + "ca", + "us", + "mx", + "de", + "es", + "fr", + "it", + "pt", + "gb", + "ru", + "cn", + "th", + "jp", + "my", + "id", + "za", + "au" + ], + "type": "string" + }, + "LicenseEnum": { + "title": "LicenseEnum", + "description": "An enumeration.", + "enum": [ + "odc_by", + "odbl", + "ppdl", + "cc_40", + "cc_by", + "cc_by_nc", + "cc_by_nd", + "cc_by_nc_nd", + "gnu_gpl_v1", + "gnu_gpl_v2", + "gnu_gpl_v3", + "mit" + ], + "type": "string" + }, + "SpatialCoverageArea": { + "title": "SpatialCoverageArea", + "type": "string" + }, + "TemporalCoverageEnum": { + "title": "TemporalCoverageEnum", + "type": "array", + "items": { + "type": "string" + } + }, + "TimeUnitEnum": { + "title": "TimeUnitEnum", + "description": "An enumeration.", + "enum": [ + "second", + "minute", + "hour", + "day", + "week", + "month", + "quarter", + "semester", + "one_year", + "two_years", + "three_years", + "four_years", + "five_years", + "ten_years", + "unique", + "recurring", + "uncertain", + "other" + ], + "type": "string" + }, + "EntityDateTimeEnum": { + "title": "EntityDateTimeEnum", + "description": "An enumeration.", + "enum": [ + "year", + "semester", + "quarter", + "bimester", + "month", + "week", + "day", + "hour", + "minute", + "second", + "date", + "time" + ], + "type": "string" + }, + "EntitySpatialEnum": { + "title": "EntitySpatialEnum", + "description": "An enumeration.", + "enum": [ + "continent", + "country", + "region", + "state", + "district", + "county", + "municipality", + "city", + "village", + "neighborhood", + "zip_code", + "census_tract" + ], + "type": "string" + }, + "EntityIndividualEnum": { + "title": "EntityIndividualEnum", + "description": "An enumeration.", + "enum": [ + "person", + "household", + "name", + "animal", + "plant" + ], + "type": "string" + }, + "EntityEstablishmentEnum": { + "title": "EntityEstablishmentEnum", + "description": "An enumeration.", + "enum": [ + "agency", + "protected_area", + "band", + "library", + "notary_office", + "school", + "legislature", + "police_station", + "company", + "station", + "stadium", + "terrorist_group", + "hospital", + "church", + "property", + "ministry", + "museum", + "construction", + "ngo", + "prison", + "team", + "court", + "store" + ], + "type": "string" + }, + "EntityPoliticsEnum": { + "title": "EntityPoliticsEnum", + "description": "An enumeration.", + "enum": [ + "agreement", + "speech", + "election", + "caucus", + "law", + "party", + "poll", + "vote" + ], + "type": "string" + }, + "EntityScienceEnum": { + "title": "EntityScienceEnum", + "description": "An enumeration.", + "enum": [ + "article", + "citation", + "domain", + "document", + "iceberg", + "book", + "newspaper", + "drug", + "patent", + "journal", + "word", + "post", + "langugage", + "crs", + "page", + "protein", + "meteor", + "terrain", + "typo" + ], + "type": "string" + }, + "EntityEconomicsEnum": { + "title": "EntityEconomicsEnum", + "description": "An enumeration.", + "enum": [ + "contract", + "donation", + "amendment", + "expenditure", + "item", + "grant", + "procurement", + "product", + "transaction", + "transfer", + "bill", + "occupation", + "sector" + ], + "type": "string" + }, + "EntityEducationEnum": { + "title": "EntityEducationEnum", + "description": "An enumeration.", + "enum": [ + "scholarship", + "exam" + ], + "type": "string" + }, + "EntityEventEnum": { + "title": "EntityEventEnum", + "description": "An enumeration.", + "enum": [ + "alert", + "attack", + "audit", + "act", + "concert", + "disinvitation", + "disaster", + "war", + "territorial_change", + "birth", + "death", + "request", + "protest", + "match", + "sanction" + ], + "type": "string" + }, + "EntityArtEnum": { + "title": "EntityArtEnum", + "description": "An enumeration.", + "enum": [ + "album", + "movie", + "photo", + "song", + "statue", + "painting", + "poem" + ], + "type": "string" + }, + "EntityInfrastructureEnum": { + "title": "EntityInfrastructureEnum", + "description": "An enumeration.", + "enum": [ + "dam", + "satellitte", + "street_road", + "roller_coaster" + ], + "type": "string" + }, + "EntityTransportationEnum": { + "title": "EntityTransportationEnum", + "description": "An enumeration.", + "enum": [ + "automobile", + "train", + "aircraft", + "ship" + ], + "type": "string" + }, + "EntitySecurityEnum": { + "title": "EntitySecurityEnum", + "description": "An enumeration.", + "enum": [ + "gun" + ], + "type": "string" + }, + "EntityDemographicEnum": { + "title": "EntityDemographicEnum", + "description": "An enumeration.", + "enum": [ + "age", + "race", + "sex" + ], + "type": "string" + }, + "EntityHistoryEnum": { + "title": "EntityHistoryEnum", + "description": "An enumeration.", + "enum": [ + "empire" + ], + "type": "string" + }, + "EntityOtherEnum": { + "title": "EntityOtherEnum", + "description": "An enumeration.", + "enum": [ + "other" + ], + "type": "string" + }, + "EntityImageEnum": { + "title": "EntityImageEnum", + "description": "An enumeration.", + "enum": [ + "pixel", + "polygon" + ], + "type": "string" + }, + "ObservationLevel": { + "title": "ObservationLevel", + "type": "object", + "properties": { + "country": { + "title": "País", + "description": "País da entidade. Deixar nulo se entidade for internacional ou não-espacial.\nOpções em 'https://basedosdados.org/api/3/action/bd_available_options'", + "allOf": [ + { + "$ref": "#/definitions/CountryEnum" + } + ] + }, + "entity": { + "title": "Entidade", + "description": "Opções em 'https://basedosdados.org/api/3/action/bd_available_options'", + "anyOf": [ + { + "$ref": "#/definitions/EntityDateTimeEnum" + }, + { + "$ref": "#/definitions/EntitySpatialEnum" + }, + { + "$ref": "#/definitions/EntityIndividualEnum" + }, + { + "$ref": "#/definitions/EntityEstablishmentEnum" + }, + { + "$ref": "#/definitions/EntityPoliticsEnum" + }, + { + "$ref": "#/definitions/EntityScienceEnum" + }, + { + "$ref": "#/definitions/EntityEconomicsEnum" + }, + { + "$ref": "#/definitions/EntityEducationEnum" + }, + { + "$ref": "#/definitions/EntityEventEnum" + }, + { + "$ref": "#/definitions/EntityArtEnum" + }, + { + "$ref": "#/definitions/EntityInfrastructureEnum" + }, + { + "$ref": "#/definitions/EntityTransportationEnum" + }, + { + "$ref": "#/definitions/EntitySecurityEnum" + }, + { + "$ref": "#/definitions/EntityDemographicEnum" + }, + { + "$ref": "#/definitions/EntityHistoryEnum" + }, + { + "$ref": "#/definitions/EntityOtherEnum" + }, + { + "$ref": "#/definitions/EntityImageEnum" + } + ] + }, + "columns": { + "title": "Colunas identificadoras", + "description": "Colunas identificadoras da entidade", + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "ExternalLink": { + "title": "ExternalLink", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "name": { + "title": "Nome", + "type": "string" + }, + "description": { + "title": "Descrição", + "description": "Descreva a fonte externa. Essas são as primeiras frases que um usuário vai ver.\nVocê não precisa ser muito conciso. Sinta-se a vontade para dar exemplos de\ncomo usar os dados.\nSe souber, liste também aplicações: pesquisa, apps, etc. que usem os dados.", + "yaml_order": { + "id_before": "title", + "id_after": "language" + }, + "type": "string" + }, + "position": { + "title": "Posição", + "type": "integer" + }, + "url": { + "title": "Url", + "description": "Url da fonte original.", + "yaml_order": { + "id_before": null, + "id_after": "title" + }, + "type": "string" + }, + "cache_last_updated": { + "title": "Última Atualização do Cache", + "type": "string", + "format": "date-time" + }, + "cache_url": { + "title": "Url Cache", + "type": "string" + }, + "created": { + "title": "Data de Criação", + "type": "string", + "format": "date-time" + }, + "datastore_active": { + "title": "Datastore Ativa", + "type": "boolean" + }, + "format": { + "title": "Formato", + "type": "string" + }, + "hash": { + "title": "Hash", + "type": "string" + }, + "last_modified": { + "title": "Data da Útima Atualização", + "type": "string", + "format": "date-time" + }, + "metadata_modified": { + "title": "Data de Modificação", + "type": "string", + "format": "date-time" + }, + "mimetype": { + "title": "Mimetype", + "type": "string" + }, + "mimetype_inner": { + "title": "Mimetype Inner", + "type": "string" + }, + "package_id": { + "title": "Package Id", + "type": "string" + }, + "size": { + "title": "Size", + "type": "number" + }, + "state": { + "title": "Estado", + "type": "string" + }, + "url_type": { + "title": "Url Type", + "type": "string" + }, + "resource_type": { + "title": "Resource Type", + "enum": [ + "external_link" + ], + "type": "string" + }, + "language": { + "title": "Língua", + "description": "Em quais línguas a fonte externa está disponível.\nOpções em 'language' em https://basedosdados.org/api/3/action/bd_available_options.", + "yaml_order": { + "id_before": "description", + "id_after": "has_structure_data" + }, + "type": "array", + "items": { + "$ref": "#/definitions/LanguageEnum" + }, + "uniqueItems": true + }, + "has_structured_data": { + "title": "Tem Dados Estruturados", + "description": "A fonte externa disponibiliza dados em formatos estruturados, como csv, json, etc?\nOpções: yes, no.", + "yaml_order": { + "id_before": "language", + "id_after": "has_api" + }, + "allOf": [ + { + "$ref": "#/definitions/YesNoEnum" + } + ] + }, + "has_api": { + "title": "Tem uma API", + "description": "A fonte externa disponibiliza uma API para acesso aos dados?\nOpções: yes, no.", + "yaml_order": { + "id_before": "has_structured_data", + "id_after": "is_free" + }, + "allOf": [ + { + "$ref": "#/definitions/YesNoEnum" + } + ] + }, + "is_free": { + "title": "É de Graça", + "description": "O acesso aos dados da fonte externa é grátis?\nOpções: yes, no.", + "yaml_order": { + "id_before": "has_api", + "id_after": "requires_registration" + }, + "allOf": [ + { + "$ref": "#/definitions/YesNoEnum" + } + ] + }, + "requires_registration": { + "title": "Requer Registro", + "description": "A fonte externa requer registro de usuário para acesso aos dados?\nOpções: yes, no.", + "yaml_order": { + "id_before": "is_free", + "id_after": "availability" + }, + "allOf": [ + { + "$ref": "#/definitions/YesNoEnum" + } + ] + }, + "availability": { + "title": "Disponibilidade", + "description": "Como os dados são disponibilizados?\nOpções 'availability' em https://basedosdados.org/api/3/action/bd_available_options.", + "default": "online", + "yaml_order": { + "id_before": "requires_registration", + "id_after": "country_ip_address_required" + }, + "allOf": [ + { + "$ref": "#/definitions/AvailabilityEnum" + } + ] + }, + "country_ip_address_required": { + "title": "Requer IP de Algum País", + "description": "Países nos quais o acesso à fonte externa é liberado.\nOpções em 'country' em https://basedosdados.org/api/3/action/bd_available_options.", + "yaml_order": { + "id_before": "availability", + "id_after": "license" + }, + "type": "array", + "items": { + "$ref": "#/definitions/CountryEnum" + }, + "uniqueItems": true + }, + "license": { + "title": "Tipo de Licença", + "description": "Qual tipo de licença regula acesso aos dados da fonte externa?", + "yaml_order": { + "id_before": "country_ip_address_required", + "id_after": "spatial_coverage" + }, + "allOf": [ + { + "$ref": "#/definitions/LicenseEnum" + } + ] + }, + "spatial_coverage": { + "title": "Cobertura Espacial", + "description": "As máximas unidades espaciais que a tabela cobre.\nExemplo:\n - sa.br", + "yaml_order": { + "id_before": "license", + "id_after": "temporal_coverage" + }, + "minItems": 1, + "type": "array", + "items": { + "$ref": "#/definitions/SpatialCoverageArea" + } + }, + "temporal_coverage": { + "title": "Cobertura Temporal", + "description": "Anos cobertos pela tabela.\nPreencher como lista de intervalos.\nExemplos: 1995(1)2018 ou (1)2020.", + "yaml_order": { + "id_before": "spatial_coverage", + "id_after": "update_frequency" + }, + "allOf": [ + { + "$ref": "#/definitions/TemporalCoverageEnum" + } + ] + }, + "update_frequency": { + "title": "Frequência de Atualização", + "description": "A unidade temporal pela qual a tabela é atualizada.\nOpções em 'time_unit' em https://basedosdados.org/api/3/action/bd_available_options.", + "yaml_order": { + "id_before": "temporal_coverage", + "id_after": "observation_level" + }, + "allOf": [ + { + "$ref": "#/definitions/TimeUnitEnum" + } + ] + }, + "observation_level": { + "title": "Nível da observação", + "description": "Nível de observação dos dados: o que representa cada linha.", + "yaml_order": { + "id_before": "update_frequency", + "id_after": null + }, + "type": "array", + "items": { + "$ref": "#/definitions/ObservationLevel" + } + } + }, + "required": [ + "name", + "position", + "resource_type" + ] + }, + "LastUpdated": { + "title": "LastUpdated", + "type": "object", + "properties": { + "metadata": { + "title": "Metadados", + "type": "string" + }, + "data": { + "title": "Dados", + "type": "string" + }, + "release": { + "title": "Dados Originais", + "type": "string" + } + } + }, + "PublishedBy": { + "title": "PublishedBy", + "type": "object", + "properties": { + "name": { + "title": "Nome", + "type": "string" + }, + "email": { + "title": "Email", + "type": "string" + }, + "github_user": { + "title": "Usuário Github", + "type": "string" + }, + "ckan_user": { + "title": "Usuário CKAN", + "type": "string" + }, + "website": { + "title": "Website", + "type": "string" + } + } + }, + "DataCleanedBy": { + "title": "DataCleanedBy", + "type": "object", + "properties": { + "name": { + "title": "Nome", + "type": "string" + }, + "email": { + "title": "Email", + "type": "string" + }, + "github_user": { + "title": "Usuário Github", + "type": "string" + }, + "ckan_user": { + "title": "Usuário CKAN", + "type": "string" + }, + "website": { + "title": "Website", + "type": "string" + } + } + }, + "ckanext__basedosdados__validator__resources__bdm__PartnerOrganization": { + "title": "PartnerOrganization", + "type": "object", + "properties": { + "name": { + "title": "Nome", + "description": "Nome completo", + "type": "string" + }, + "organization_id": { + "title": "ID Organização", + "description": "ID Organização - CKAN", + "type": "string" + } + } + }, + "BigQueryTypeEnum": { + "title": "BigQueryTypeEnum", + "description": "An enumeration.", + "enum": [ + "array", + "boolean", + "date", + "datetime", + "float64", + "geography", + "int64", + "numeric", + "string", + "struct", + "time", + "timestamp" + ], + "type": "string" + }, + "DirectoryEnum": { + "title": "DirectoryEnum", + "description": "An enumeration.", + "enum": [ + "br_bd_diretorios_africa_sul", + "br_bd_diretorios_alemanha", + "br_bd_diretorios_argentina", + "br_bd_diretorios_australia", + "br_bd_diretorios_bolivia", + "br_bd_diretorios_brasil", + "br_bd_diretorios_canada", + "br_bd_diretorios_chile", + "br_bd_diretorios_china", + "br_bd_diretorios_colombia", + "br_bd_diretorios_data_tempo", + "br_bd_diretorios_dinamarca", + "br_bd_diretorios_espanha", + "br_bd_diretorios_franca", + "br_bd_diretorios_india", + "br_bd_diretorios_inglaterra", + "br_bd_diretorios_eua", + "br_bd_diretorios_mexico", + "br_bd_diretorios_mundo", + "br_bd_diretorios_noruega", + "br_bd_diretorios_peru", + "br_bd_diretorios_portugal", + "br_bd_diretorios_suecia", + "br_bd_diretorios_suica" + ], + "type": "string" + }, + "DirectoryColumn": { + "title": "DirectoryColumn", + "type": "object", + "properties": { + "dataset_id": { + "title": "ID Conjunto", + "allOf": [ + { + "$ref": "#/definitions/DirectoryEnum" + } + ] + }, + "table_id": { + "title": "ID Tabela", + "type": "string" + }, + "column_name": { + "title": "Nome Coluna", + "type": "string" + } + } + }, + "BdmColumns": { + "title": "BdmColumns", + "type": "object", + "properties": { + "name": { + "title": "Nome", + "description": "Nome em produção", + "yaml_order": { + "id_before": null, + "id_after": "bigquery_type" + }, + "type": "string" + }, + "bigquery_type": { + "title": "Tipo no BigQuery", + "description": "Tipo no BigQuery.\nOpções: string, int64, float64, date, time, geometry.Ver https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types.", + "yaml_order": { + "id_before": "name", + "id_after": "description" + }, + "allOf": [ + { + "$ref": "#/definitions/BigQueryTypeEnum" + } + ] + }, + "description": { + "title": "Descrição", + "description": "Descrição", + "yaml_order": { + "id_before": "bigquery_type", + "id_after": "temporal_coverage" + }, + "type": "string" + }, + "temporal_coverage": { + "title": "Cobertura Temporal", + "description": "Anos cobertos pela tabela.\nPreenchido como lista de intervalos sem repetir os metadados da tabela.Exemplo: 2001(1)2010, ou (1)2020, ou (1).", + "yaml_order": { + "id_before": "description", + "id_after": "covered_by_dictionary" + }, + "allOf": [ + { + "$ref": "#/definitions/TemporalCoverageEnum" + } + ] + }, + "covered_by_dictionary": { + "title": "Coberta por um Dicionário", + "description": "A coluna precisa de dicionário?\nOpções: yes, no.", + "default": "no", + "yaml_order": { + "id_before": "temporal_coverage", + "id_after": "directory_column" + }, + "allOf": [ + { + "$ref": "#/definitions/YesNoEnum" + } + ] + }, + "directory_column": { + "title": "Coluna Correspondente nos Diretórios", + "description": "Chave primária nos diretórios correspondente à coluna.", + "yaml_order": { + "id_before": "covered_by_dictionary", + "id_after": "measurement_unit" + }, + "allOf": [ + { + "$ref": "#/definitions/DirectoryColumn" + } + ] + }, + "measurement_unit": { + "title": "Unidade de Medida", + "description": "Qual é a unidade de medida da coluna?\nEscreva a fórmula matemática baseada nas chaves de unidades básicas permitidas em https://basedosdados.org/api/3/action/bd_available_options na seção Measurement Unit.\nExemplos: 'kilometer^2', 'meter^3 / second', '1000 * person', 'gigawatt'.", + "yaml_order": { + "id_before": "directory_column", + "id_after": "has_sensitive_data" + }, + "type": "string" + }, + "has_sensitive_data": { + "title": "Contém Dados Sensíveis (LGPD)", + "description": "A coluna contém dados sensíveis, como definido pela Lei Geral de Proteção de Dados (LGPD)?\nOpções: yes, no.", + "default": "no", + "yaml_order": { + "id_before": "measurement_unit", + "id_after": "observations" + }, + "allOf": [ + { + "$ref": "#/definitions/YesNoEnum" + } + ] + }, + "observations": { + "title": "Observações", + "description": "Informações sobre a coluna: arquitetura, decisões de limpeza, etc.", + "yaml_order": { + "id_before": "has_sensitive_data", + "id_after": "is_in_staging" + }, + "type": "string" + }, + "is_in_staging": { + "title": "Está em Staging", + "description": "A coluna está na tabela staging?\nOpções: True, False", + "default": true, + "yaml_order": { + "id_before": "observations", + "id_after": "is_partition" + }, + "type": "boolean" + }, + "is_partition": { + "title": "É Partição", + "description": "A coluna é uma partição?\nOpções: True, False", + "default": false, + "yaml_order": { + "id_before": "is_in_staging", + "id_after": null + }, + "type": "boolean" + } + }, + "required": [ + "name" + ] + }, + "BdmTable": { + "title": "BdmTable", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "name": { + "title": "Nome", + "type": "string" + }, + "description": { + "title": "Descrição", + "description": "Descreva a tabela. Essas são as primeiras frases que um usuário vai ver.\nVocê não precisa ser muito conciso. Sinta-se a vontade para dar exemplos de\ncomo usar os dados.\nSe souber, liste também aplicações: pesquisa, apps, etc. que usem os dados.,", + "yaml_order": { + "id_before": "title", + "id_after": "spatial_coverage" + }, + "type": "string" + }, + "position": { + "title": "Posição", + "type": "integer" + }, + "url": { + "title": "Url", + "type": "string" + }, + "cache_last_updated": { + "title": "Última Atualização do Cache", + "type": "string", + "format": "date-time" + }, + "cache_url": { + "title": "Url Cache", + "type": "string" + }, + "created": { + "title": "Data de Criação", + "type": "string", + "format": "date-time" + }, + "datastore_active": { + "title": "Datastore Ativa", + "type": "boolean" + }, + "format": { + "title": "Formato", + "type": "string" + }, + "hash": { + "title": "Hash", + "type": "string" + }, + "last_modified": { + "title": "Data da Útima Atualização", + "type": "string", + "format": "date-time" + }, + "metadata_modified": { + "title": "Data da Última Modificação dos Metadados", + "yaml_order": { + "id_before": "number_rows", + "id_after": null + }, + "type": "string", + "format": "date-time" + }, + "mimetype": { + "title": "Mimetype", + "type": "string" + }, + "mimetype_inner": { + "title": "Mimetype Inner", + "type": "string" + }, + "package_id": { + "title": "Package Id", + "type": "string" + }, + "size": { + "title": "Size", + "type": "number" + }, + "state": { + "title": "Estado", + "type": "string" + }, + "url_type": { + "title": "Url Type", + "type": "string" + }, + "resource_type": { + "title": "Resource Type", + "enum": [ + "bdm_table" + ], + "type": "string" + }, + "dataset_id": { + "title": "ID Conjunto", + "description": "Igual ao dataset.name mas como lower case.\nExemplos: br_ibge_populacao, br_inep_censo_escolar", + "yaml_order": { + "id_before": null, + "id_after": "table_id" + }, + "type": "string" + }, + "table_id": { + "title": "ID Tabela", + "yaml_order": { + "id_before": "dataset_id", + "id_after": "title" + }, + "type": "string" + }, + "spatial_coverage": { + "title": "Cobertura Espacial", + "description": "As máximas unidades espaciais que a tabela cobre.\nExemplo:\n - sa.br\n \n - sa.br.sp\n \n - world", + "yaml_order": { + "id_before": "description", + "id_after": "temporal_coverage" + }, + "minItems": 1, + "type": "array", + "items": { + "$ref": "#/definitions/SpatialCoverageArea" + } + }, + "temporal_coverage": { + "title": "Cobertura Temporal", + "description": "Anos cobertos pela tabela.\nExemplos:\n - 1995(1)2019\nCaso a cobertura não seja contínua:\n - 2002(2)2010\n - 2016\n - 2020", + "yaml_order": { + "id_before": "spatial_coverage", + "id_after": "update_frequency" + }, + "allOf": [ + { + "$ref": "#/definitions/TemporalCoverageEnum" + } + ] + }, + "update_frequency": { + "title": "Frequência de Atualização", + "description": "A unidade temporal com qual a tabela é atualizada.\nOpções em 'https://basedosdados.org/api/3/action/bd_available_options'", + "yaml_order": { + "id_before": "temporal_coverage", + "id_after": "observation_level" + }, + "allOf": [ + { + "$ref": "#/definitions/TimeUnitEnum" + } + ] + }, + "observation_level": { + "title": "Nível da observação", + "description": "Nível de observação da tabela: o que representa cada linha.\nA combinação das colunas aqui deve construir uma chave única da tabelaOpções de entity em 'https://basedosdados.org/api/3/action/bd_available_options'\nCaso a entidade seja espacial incluir a informação de 'country' Exemplos:\n - entity: year\n columns:\n - ano\n - country: br\n entity: state\n columns:\n - sigla_uf", + "yaml_order": { + "id_before": "update_frequency", + "id_after": "last_updated" + }, + "type": "array", + "items": { + "$ref": "#/definitions/ObservationLevel" + } + }, + "last_updated": { + "title": "Data da Última Atualização", + "yaml_order": { + "id_before": "observation_level", + "id_after": "version" + }, + "allOf": [ + { + "$ref": "#/definitions/LastUpdated" + } + ] + }, + "version": { + "title": "Versão", + "description": "Versão da tabela. Seguindo o padrão de semantic versioning.\nExemplos: v1.0, v1.1.3", + "yaml_order": { + "id_before": "last_updated", + "id_after": "published_by" + }, + "type": "string" + }, + "published_by": { + "title": "Publicado por", + "description": "Quem está preenchendo esses metadados?", + "yaml_order": { + "id_before": "version", + "id_after": "data_cleaned_by" + }, + "allOf": [ + { + "$ref": "#/definitions/PublishedBy" + } + ] + }, + "data_cleaned_by": { + "title": "Dados Limpos por", + "description": "Qual organização/departamento/pessoa tratou os dados?\nAs vezes há um ponto intermediário entre os dados originais e subir na Base dos Dados.\nSe essa pessoa é você, preencha abaixo com suas informações.", + "yaml_order": { + "id_before": "published_by", + "id_after": "data_cleaning_description" + }, + "allOf": [ + { + "$ref": "#/definitions/DataCleanedBy" + } + ] + }, + "data_cleaning_description": { + "title": "Descrição da Limpeza de Dados", + "description": "Se houve passos de tratamento, limpeza e manipulação de dados, descreva-os aqui.", + "yaml_order": { + "id_before": "data_cleaned_by", + "id_after": "data_cleaning_code_url" + }, + "type": "string" + }, + "data_cleaning_code_url": { + "title": "Url do Código de Limpeza dos Dados", + "description": "Url do código de limpeza dos dados do github.", + "yaml_order": { + "id_before": "data_cleaning_description", + "id_after": "partner_organization" + }, + "type": "string" + }, + "partner_organization": { + "title": "Organização parceira", + "description": "Organização que ajudou institucionalmente na disponibilização dos dados.", + "yaml_order": { + "id_before": "data_cleaning_code_url", + "id_after": "raw_files_url" + }, + "allOf": [ + { + "$ref": "#/definitions/ckanext__basedosdados__validator__resources__bdm__PartnerOrganization" + } + ] + }, + "raw_files_url": { + "title": "Url dos Dados Originais", + "description": "Url dos dados originais no GCP Storage.", + "yaml_order": { + "id_before": "partner_organization", + "id_after": "auxiliary_files_url" + }, + "type": "string" + }, + "auxiliary_files_url": { + "title": "Url dos Arquivos Auxiliares", + "description": "Url dos arquivos auxiliares no GCP Storage.", + "yaml_order": { + "id_before": "raw_files_url", + "id_after": "architecture_url" + }, + "type": "string" + }, + "architecture_url": { + "title": "Url da Tabela de Arquitetura", + "description": "Url da tabela de arquitetura no GCP Storage.", + "yaml_order": { + "id_before": "auxiliary_files_url", + "id_after": "source_bucket_name" + }, + "type": "string" + }, + "source_bucket_name": { + "title": "Nome do Bucket Fonte no GCP", + "yaml_order": { + "id_before": "architecture_url", + "id_after": "project_id_prod" + }, + "type": "string" + }, + "project_id_prod": { + "title": "ID do Projeto de Produção no GCP", + "yaml_order": { + "id_before": "source_bucket_name", + "id_after": "project_id_staging" + }, + "type": "string" + }, + "project_id_staging": { + "title": "ID do Projeto de Staging no GCP", + "yaml_order": { + "id_before": "project_id_prod", + "id_after": "partitions" + }, + "type": "string" + }, + "partitions": { + "title": "Partições", + "description": "Liste as colunas da tabela que representam partições.\nNão esqueça de deletar essas colunas nas tabelas .csv na hora de subir para o BigQuery.\nIsso poupará muito tempo e dinheiro às pessoas utilizando essa tabela.\nSe não houver partições, não modifique abaixo.", + "yaml_order": { + "id_before": "project_id_staging", + "id_after": "columns" + }, + "type": "array", + "items": { + "type": "string" + } + }, + "uncompressed_file_size": { + "title": "Tamanho do Arquivo Não-Comprimido (em bytes)", + "type": "integer" + }, + "compressed_file_size": { + "title": "Tamanho do Arquivo Comprimido (em bytes)", + "type": "integer" + }, + "columns": { + "title": "Colunas", + "description": "Quais são as colunas? Certifique-se de escrever uma boa descrição, as pessoas vão gostar\npara saber sobre o que é a coluna.\nAdicionar todas as colunas manualmente pode ser bastante cansativo, por isso, quando\ninicializando este arquivo de configuração, você pode apontar a função para uma amostra de dados que\npreencherá automaticamente as colunas.\nAlém disso, você deve adicionar as colunas de partição aqui e definir is_partition como True.", + "yaml_order": { + "id_before": "partitions", + "id_after": "number_rows" + }, + "type": "array", + "items": { + "$ref": "#/definitions/BdmColumns" + } + }, + "title": { + "title": "Título", + "description": "Título da tabela.", + "yaml_order": { + "id_before": "table_id", + "id_after": "description" + }, + "type": "string" + }, + "number_rows": { + "title": "Número de Linhas da Tabela", + "yaml_order": { + "id_before": "columns", + "id_after": "metadata_modified" + }, + "type": "integer" + } + }, + "required": [ + "name", + "position", + "resource_type", + "dataset_id", + "table_id" + ] + }, + "RequestedBy": { + "title": "RequestedBy", + "type": "object", + "properties": { + "name": { + "title": "Nome", + "type": "string" + }, + "email": { + "title": "Email", + "type": "string" + }, + "github_user": { + "title": "Usuário Github", + "type": "string" + }, + "ckan_user": { + "title": "Usuário CKAN", + "type": "string" + }, + "website": { + "title": "Website", + "type": "string" + } + } + }, + "StatusEnum": { + "title": "StatusEnum", + "description": "An enumeration.", + "enum": [ + "processing", + "answered", + "denied" + ], + "type": "string" + }, + "ckanext__basedosdados__validator__resources__information_request__fields_definitions__PartnerOrganization": { + "title": "PartnerOrganization", + "type": "object", + "properties": { + "name": { + "title": "Nome", + "description": "Nome completo", + "type": "string" + }, + "organization_id": { + "title": "ID Organização", + "description": "ID Organização - CKAN", + "type": "string" + } + } + }, + "InformationRequest": { + "title": "InformationRequest", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "name": { + "title": "Nome", + "type": "string" + }, + "description": { + "title": "Descrição", + "description": "Descreva o pedido.\nVocê não precisa ser muito conciso. Sinta-se a vontade para explicar a origem do pedido e outras informações relevantes.", + "yaml_order": { + "id_before": "department", + "id_after": "opening_date" + }, + "type": "string" + }, + "position": { + "title": "Posição", + "type": "integer" + }, + "url": { + "title": "Url", + "description": "Url onde está disponível o pedido.", + "yaml_order": { + "id_before": "number", + "id_after": "department" + }, + "type": "string" + }, + "cache_last_updated": { + "title": "Última Atualização do Cache", + "type": "string", + "format": "date-time" + }, + "cache_url": { + "title": "Url Cache", + "type": "string" + }, + "created": { + "title": "Data de Criação", + "type": "string", + "format": "date-time" + }, + "datastore_active": { + "title": "Datastore Ativa", + "type": "boolean" + }, + "format": { + "title": "Formato", + "type": "string" + }, + "hash": { + "title": "Hash", + "type": "string" + }, + "last_modified": { + "title": "Data da Útima Atualização", + "type": "string", + "format": "date-time" + }, + "metadata_modified": { + "title": "Data de Modificação", + "type": "string", + "format": "date-time" + }, + "mimetype": { + "title": "Mimetype", + "type": "string" + }, + "mimetype_inner": { + "title": "Mimetype Inner", + "type": "string" + }, + "package_id": { + "title": "Package Id", + "type": "string" + }, + "size": { + "title": "Size", + "type": "number" + }, + "state": { + "title": "Estado", + "type": "string" + }, + "url_type": { + "title": "Url Type", + "type": "string" + }, + "resource_type": { + "title": "Resource Type", + "enum": [ + "information_request" + ], + "type": "string" + }, + "origin": { + "title": "Origem", + "description": "Origem do pedido\nExemplos: FalaBr, Senado, SIC-SP, etc.", + "yaml_order": { + "id_before": null, + "id_after": "number" + }, + "type": "string" + }, + "number": { + "title": "Número", + "description": "Número de pedido.", + "yaml_order": { + "id_before": "origin", + "id_after": "url" + }, + "type": "string" + }, + "department": { + "title": "Departamento", + "description": "Departamento/Órgão vinculado", + "yaml_order": { + "id_before": "url", + "id_after": "description" + }, + "type": "string" + }, + "opening_date": { + "title": "Data de Abertura", + "description": "Formato YYYY-MM-DD", + "yaml_order": { + "id_before": "description", + "id_after": "requested_by" + }, + "type": "string" + }, + "requested_by": { + "title": "Quem Fez o Pedido", + "yaml_order": { + "id_before": "opening_date", + "id_after": "spatial_coverage" + }, + "allOf": [ + { + "$ref": "#/definitions/RequestedBy" + } + ] + }, + "spatial_coverage": { + "title": "Cobertura Espacial", + "description": "A máxima unidade espacial que os dados pedidos cobrem.", + "yaml_order": { + "id_before": "requested_by", + "id_after": "temporal_coverage" + }, + "minItems": 1, + "type": "array", + "items": { + "$ref": "#/definitions/SpatialCoverageArea" + } + }, + "temporal_coverage": { + "title": "Cobertura Temporal", + "description": "Anos cobertos pelos dados pedidos.Preencher como lista de intervalos.\nExemplos: 1995(1)2018 ou (1)2020.", + "yaml_order": { + "id_before": "spatial_coverage", + "id_after": "update_frequency" + }, + "allOf": [ + { + "$ref": "#/definitions/TemporalCoverageEnum" + } + ] + }, + "update_frequency": { + "title": "Frequência de Atualização", + "description": "A unidade temporal na qual os dados pedidos são atualizados.\nOpções em 'time_unit' em https://basedosdados.org/api/3/action/bd_available_options.", + "yaml_order": { + "id_before": "temporal_coverage", + "id_after": "observation_level" + }, + "allOf": [ + { + "$ref": "#/definitions/TimeUnitEnum" + } + ] + }, + "observation_level": { + "title": "Nível da observação", + "description": "Nível de observação dos dados: o que representa cada linha.", + "yaml_order": { + "id_before": "update_frequency", + "id_after": "status" + }, + "type": "array", + "items": { + "$ref": "#/definitions/ObservationLevel" + } + }, + "status": { + "title": "Status", + "description": "Estado do pedido.Opções em 'status' em https://basedosdados.org/api/3/action/bd_available_options.", + "yaml_order": { + "id_before": "observation_level", + "id_after": "data_url" + }, + "allOf": [ + { + "$ref": "#/definitions/StatusEnum" + } + ] + }, + "data_url": { + "title": "Url dos Dados", + "description": "Onde estão os dados da resposta?\nExemplo: www.exemplo.com/dados.csv", + "yaml_order": { + "id_before": "status", + "id_after": "observations" + }, + "type": "string" + }, + "observations": { + "title": "Observações", + "yaml_order": { + "id_before": "data_url", + "id_after": "partner_organization" + }, + "type": "string" + }, + "partner_organization": { + "title": "Organização parceira", + "description": "Organização que ajudou institucionalmente na criação ou disponibilização do pedido de informação.", + "yaml_order": { + "id_before": "observations", + "id_after": null + }, + "allOf": [ + { + "$ref": "#/definitions/ckanext__basedosdados__validator__resources__information_request__fields_definitions__PartnerOrganization" + } + ] + } + }, + "required": [ + "name", + "position", + "resource_type" + ] + }, + "BdmDictionary": { + "title": "BdmDictionary", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "name": { + "title": "Nome", + "type": "string" + }, + "description": { + "title": "Descrição", + "type": "string" + }, + "position": { + "title": "Posição", + "type": "integer" + }, + "url": { + "title": "Url", + "type": "string" + }, + "cache_last_updated": { + "title": "Última Atualização do Cache", + "type": "string", + "format": "date-time" + }, + "cache_url": { + "title": "Url Cache", + "type": "string" + }, + "created": { + "title": "Data de Criação", + "type": "string", + "format": "date-time" + }, + "datastore_active": { + "title": "Datastore Ativa", + "type": "boolean" + }, + "format": { + "title": "Formato", + "type": "string" + }, + "hash": { + "title": "Hash", + "type": "string" + }, + "last_modified": { + "title": "Data da Útima Atualização", + "type": "string", + "format": "date-time" + }, + "metadata_modified": { + "title": "Data de Modificação", + "type": "string", + "format": "date-time" + }, + "mimetype": { + "title": "Mimetype", + "type": "string" + }, + "mimetype_inner": { + "title": "Mimetype Inner", + "type": "string" + }, + "package_id": { + "title": "Package Id", + "type": "string" + }, + "size": { + "title": "Size", + "type": "number" + }, + "state": { + "title": "Estado", + "type": "string" + }, + "url_type": { + "title": "Url Type", + "type": "string" + }, + "resource_type": { + "title": "Resource Type", + "enum": [ + "bdm_dictionary" + ], + "type": "string" + }, + "dataset_id": { + "title": "ID Conjunto", + "description": "Campo dataset_id padrão.", + "yaml_order": { + "id_after": null, + "id_before": "table_id" + }, + "type": "string" + }, + "table_id": { + "title": "ID Tabela", + "description": "Campo table_id padrão.", + "yaml_order": { + "id_after": "dataset_id", + "id_before": "identifying_columns" + }, + "type": "string" + }, + "identifying_columns": { + "title": "Colunas Identificadoras", + "description": "O conjunto mínimo de colunas identificando cada linha unicamente.\nPreencha com os nomes de colunas.\nExemplos: id_municipio, ano.\nPode ser vazio pois certas tabelas não possuem identificadores.", + "yaml_order": { + "id_after": "table_id", + "id_before": "last_updated" + }, + "type": "array", + "items": { + "type": "string" + } + }, + "last_updated": { + "title": "Data da Última Atualização", + "yaml_order": { + "id_after": "identifying_columns", + "id_before": "published_by" + }, + "allOf": [ + { + "$ref": "#/definitions/LastUpdated" + } + ] + }, + "published_by": { + "title": "Publicado por", + "description": "Quem está preenchendo esses metadados?", + "yaml_order": { + "id_after": "last_updated", + "id_before": "source_bucket_name" + }, + "allOf": [ + { + "$ref": "#/definitions/PublishedBy" + } + ] + }, + "source_bucket_name": { + "title": "Nome do Bucket Fonte no GCP", + "yaml_order": { + "id_after": "published_by", + "id_before": "project_id_prod" + }, + "type": "string" + }, + "project_id_prod": { + "title": "ID do Projeto de Produção no GCP", + "yaml_order": { + "id_after": "source_bucket_name", + "id_before": "project_id_staging" + }, + "type": "string" + }, + "project_id_staging": { + "title": "ID do Projeto de Staging no GCP", + "yaml_order": { + "id_after": "project_id_prod", + "id_before": "ckan_url" + }, + "type": "string" + }, + "partitions": { + "title": "Partições", + "description": "Liste as colunas da tabela que representam partições.\nNão esqueça de deletar essas colunas nas tabelas .csv na hora de subir para o BigQuery.\nIsso poupará muito tempo e dinheiro às pessoas utilizando essa tabela.\nSe não houver partições, não modifique abaixo.", + "yaml_order": { + "id_after": "github_url", + "id_before": "bdm_file_size" + }, + "type": "string" + }, + "bdm_file_size": { + "title": "Tamanho do Arquivo", + "yaml_order": { + "id_after": "partitions", + "id_before": "columns" + }, + "anyOf": [ + { + "type": "integer" + }, + { + "enum": [ + "Unavailable", + "" + ], + "type": "string" + } + ] + }, + "columns": { + "title": "Colunas", + "description": "Quais são as colunas? Certifique-se de escrever uma boa descrição, as pessoas vão gostar\npara saber sobre o que é a coluna.\nAdicionar todas as colunas manualmente pode ser bastante cansativo, por isso, quando\ninicializando este arquivo de configuração, você pode apontar a função para uma amostra de dados que\npreencherá automaticamente as colunas.\nAlgumas colunas existirão apenas na tabela final, você as construirá em `publish.sql`.\nPara esses, defina is_in_staging como False.\nAlém disso, você deve adicionar as colunas de partição aqui e definir is_partition como True.", + "yaml_order": { + "id_after": "bdm_file_size", + "id_before": null + }, + "type": "array", + "items": { + "$ref": "#/definitions/BdmColumns" + } + } + }, + "required": [ + "name", + "description", + "position", + "resource_type", + "dataset_id", + "table_id" + ] + } + } + } +} \ No newline at end of file diff --git a/python-package/basedosdados/schemas/table_schema.json b/python-package/basedosdados/schemas/table_schema.json new file mode 100644 index 000000000..b4342025d --- /dev/null +++ b/python-package/basedosdados/schemas/table_schema.json @@ -0,0 +1,1071 @@ +{ + "help": "https://basedosdados.org/api/3/action/help_show?name=bd_bdm_table_schema", + "success": true, + "result": { + "title": "BdmTable", + "type": "object", + "properties": { + "id": { + "title": "Id", + "type": "string" + }, + "description": { + "title": "Descrição", + "description": "Descreva a tabela. Essas são as primeiras frases que um usuário vai ver.\nVocê não precisa ser muito conciso. Sinta-se a vontade para dar exemplos de\ncomo usar os dados.\nSe souber, liste também aplicações: pesquisa, apps, etc. que usem os dados.,", + "yaml_order": { + "id_before": "name", + "id_after": "spatial_coverage" + }, + "type": "string" + }, + "position": { + "title": "Posição", + "type": "integer" + }, + "url": { + "title": "Url", + "type": "string" + }, + "cache_last_updated": { + "title": "Última Atualização do Cache", + "type": "string", + "format": "date-time" + }, + "cache_url": { + "title": "Url Cache", + "type": "string" + }, + "created": { + "title": "Data de Criação", + "type": "string", + "format": "date-time" + }, + "datastore_active": { + "title": "Datastore Ativa", + "type": "boolean" + }, + "format": { + "title": "Formato", + "type": "string" + }, + "hash": { + "title": "Hash", + "type": "string" + }, + "last_modified": { + "title": "Data da Útima Atualização", + "type": "string", + "format": "date-time" + }, + "metadata_modified": { + "title": "Data da Última Modificação dos Metadados", + "yaml_order": { + "id_before": "number_rows", + "id_after": null + }, + "type": "string", + "format": "date-time" + }, + "mimetype": { + "title": "Mimetype", + "type": "string" + }, + "mimetype_inner": { + "title": "Mimetype Inner", + "type": "string" + }, + "package_id": { + "title": "Package Id", + "type": "string" + }, + "size": { + "title": "Size", + "type": "number" + }, + "state": { + "title": "Estado", + "type": "string" + }, + "url_type": { + "title": "Url Type", + "type": "string" + }, + "resource_type": { + "title": "Resource Type", + "enum": [ + "bdm_table" + ], + "type": "string" + }, + "dataset_id": { + "title": "ID Conjunto", + "description": "UUID do conjunto de dados ao qual a tabela pertence (preenchido automaticamente).", + "yaml_order": { + "id_before": null, + "id_after": "dataset_slug" + }, + "type": "string" + }, + "dataset_slug": { + "title": "Dataset Slug", + "description": "Slug do conjunto de dados ao qual a tabela pertence.", + "type": "string", + "yaml_order": { + "id_before": "dataset_id", + "id_after": "table_id" + } + }, + "table_id": { + "title": "ID Tabela", + "description": "UUID da tabela (será preenchido automaticamente)", + "yaml_order": { + "id_before": "dataset_slug", + "id_after": "table_slug" + }, + "type": "string" + }, + "table_slug": { + "title": "Table Slug", + "type": "string", + "description": "Slug da tabela", + "yaml_order": { + "id_before": "table_id", + "id_after": "name" + } + }, + "spatial_coverage": { + "title": "Cobertura Espacial", + "description": "As máximas unidades espaciais que a tabela cobre.\nExemplo:\n - sa.br\n \n - sa.br.sp\n \n - world", + "yaml_order": { + "id_before": "description", + "id_after": "temporal_coverage" + }, + "minItems": 1, + "type": "array", + "items": { + "$ref": "#/definitions/SpatialCoverageArea" + } + }, + "temporal_coverage": { + "title": "Cobertura Temporal", + "description": "Anos cobertos pela tabela.\nExemplos:\n - 1995(1)2019\nCaso a cobertura não seja contínua:\n - 2002(2)2010\n - 2016\n - 2020", + "yaml_order": { + "id_before": "spatial_coverage", + "id_after": "update_frequency" + }, + "allOf": [ + { + "$ref": "#/definitions/TemporalCoverageEnum" + } + ] + }, + "update_frequency": { + "title": "Frequência de Atualização", + "description": "A unidade temporal com qual a tabela é atualizada.\nOpções em 'https://basedosdados.org/api/3/action/bd_available_options'", + "yaml_order": { + "id_before": "temporal_coverage", + "id_after": "observation_level" + }, + "allOf": [ + { + "$ref": "#/definitions/TimeUnitEnum" + } + ] + }, + "observation_level": { + "title": "Nível da observação", + "description": "Nível de observação da tabela: o que representa cada linha.\nA combinação das colunas aqui deve construir uma chave única da tabelaOpções de entity em 'https://basedosdados.org/api/3/action/bd_available_options'\nCaso a entidade seja espacial incluir a informação de 'country' Exemplos:\n - entity: year\n columns:\n - ano\n - country: br\n entity: state\n columns:\n - sigla_uf", + "yaml_order": { + "id_before": "update_frequency", + "id_after": "last_updated" + }, + "type": "array", + "items": { + "$ref": "#/definitions/ObservationLevel" + } + }, + "last_updated": { + "title": "Data da Última Atualização", + "yaml_order": { + "id_before": "observation_level", + "id_after": "version" + }, + "allOf": [ + { + "$ref": "#/definitions/LastUpdated" + } + ] + }, + "version": { + "title": "Versão", + "description": "Versão da tabela. Seguindo o padrão de semantic versioning.\nExemplos: v1.0, v1.1.3", + "yaml_order": { + "id_before": "last_updated", + "id_after": "published_by" + }, + "type": "string" + }, + "published_by": { + "title": "Publicado por", + "description": "Quem está preenchendo esses metadados?", + "yaml_order": { + "id_before": "version", + "id_after": "data_cleaned_by" + }, + "allOf": [ + { + "$ref": "#/definitions/PublishedBy" + } + ] + }, + "data_cleaned_by": { + "title": "Dados Limpos por", + "description": "Qual organização/departamento/pessoa tratou os dados?\nAs vezes há um ponto intermediário entre os dados originais e subir na Base dos Dados.\nSe essa pessoa é você, preencha abaixo com suas informações.", + "yaml_order": { + "id_before": "published_by", + "id_after": "data_cleaning_description" + }, + "allOf": [ + { + "$ref": "#/definitions/DataCleanedBy" + } + ] + }, + "data_cleaning_description": { + "title": "Descrição da Limpeza de Dados", + "description": "Se houve passos de tratamento, limpeza e manipulação de dados, descreva-os aqui.", + "yaml_order": { + "id_before": "data_cleaned_by", + "id_after": "data_cleaning_code_url" + }, + "type": "string" + }, + "data_cleaning_code_url": { + "title": "Url do Código de Limpeza dos Dados", + "description": "Url do código de limpeza dos dados do github.", + "yaml_order": { + "id_before": "data_cleaning_description", + "id_after": "partner_organization" + }, + "type": "string" + }, + "partner_organization": { + "title": "Organização parceira", + "description": "Organização que ajudou institucionalmente na disponibilização dos dados.", + "yaml_order": { + "id_before": "data_cleaning_code_url", + "id_after": "raw_files_url" + }, + "allOf": [ + { + "$ref": "#/definitions/PartnerOrganization" + } + ] + }, + "raw_files_url": { + "title": "Url dos Dados Originais", + "description": "Url dos dados originais no GCP Storage.", + "yaml_order": { + "id_before": "partner_organization", + "id_after": "auxiliary_files_url" + }, + "type": "string" + }, + "auxiliary_files_url": { + "title": "Url dos Arquivos Auxiliares", + "description": "Url dos arquivos auxiliares no GCP Storage.", + "yaml_order": { + "id_before": "raw_files_url", + "id_after": "architecture_url" + }, + "type": "string" + }, + "architecture_url": { + "title": "Url da Tabela de Arquitetura", + "description": "Url da tabela de arquitetura no GCP Storage.", + "yaml_order": { + "id_before": "auxiliary_files_url", + "id_after": "source_bucket_name" + }, + "type": "string" + }, + "source_bucket_name": { + "title": "Nome do Bucket Fonte no GCP", + "yaml_order": { + "id_before": "architecture_url", + "id_after": "project_id_prod" + }, + "type": "string" + }, + "project_id_prod": { + "title": "ID do Projeto de Produção no GCP", + "yaml_order": { + "id_before": "source_bucket_name", + "id_after": "project_id_staging" + }, + "type": "string" + }, + "project_id_staging": { + "title": "ID do Projeto de Staging no GCP", + "yaml_order": { + "id_before": "project_id_prod", + "id_after": "partitions" + }, + "type": "string" + }, + "partitions": { + "title": "Partições", + "description": "Liste as colunas da tabela que representam partições.\nNão esqueça de deletar essas colunas nas tabelas .csv na hora de subir para o BigQuery.\nIsso poupará muito tempo e dinheiro às pessoas utilizando essa tabela.\nSe não houver partições, não modifique abaixo.", + "yaml_order": { + "id_before": "project_id_staging", + "id_after": "columns" + }, + "type": "array", + "items": { + "type": "string" + } + }, + "uncompressed_file_size": { + "title": "Tamanho do Arquivo Não-Comprimido (em bytes)", + "type": "integer" + }, + "compressed_file_size": { + "title": "Tamanho do Arquivo Comprimido (em bytes)", + "type": "integer" + }, + "columns": { + "title": "Colunas", + "description": "Quais são as colunas? Certifique-se de escrever uma boa descrição, as pessoas vão gostar\npara saber sobre o que é a coluna.\nAdicionar todas as colunas manualmente pode ser bastante cansativo, por isso, quando\ninicializando este arquivo de configuração, você pode apontar a função para uma amostra de dados que\npreencherá automaticamente as colunas.\nAlém disso, você deve adicionar as colunas de partição aqui e definir is_partition como True.", + "yaml_order": { + "id_before": "partitions", + "id_after": "number_rows" + }, + "type": "array", + "items": { + "$ref": "#/definitions/BdmColumns" + } + }, + "name": { + "title": "Name", + "description": "Nome da tabela.", + "yaml_order": { + "id_before": "table_id", + "id_after": "description" + }, + "type": "string" + }, + "number_rows": { + "title": "Número de Linhas da Tabela", + "yaml_order": { + "id_before": "columns", + "id_after": "metadata_modified" + }, + "type": "integer" + } + }, + "required": [ + "name", + "position", + "resource_type", + "dataset_id", + "table_id" + ], + "definitions": { + "SpatialCoverageArea": { + "title": "SpatialCoverageArea", + "type": "string" + }, + "TemporalCoverageEnum": { + "title": "TemporalCoverageEnum", + "type": "array", + "items": { + "type": "string" + } + }, + "TimeUnitEnum": { + "title": "TimeUnitEnum", + "description": "An enumeration.", + "enum": [ + "second", + "minute", + "hour", + "day", + "week", + "month", + "quarter", + "semester", + "one_year", + "two_years", + "three_years", + "four_years", + "five_years", + "ten_years", + "unique", + "recurring", + "uncertain", + "other" + ], + "type": "string" + }, + "CountryEnum": { + "title": "CountryEnum", + "description": "An enumeration.", + "enum": [ + "br", + "ar", + "bo", + "cl", + "co", + "ca", + "us", + "mx", + "de", + "es", + "fr", + "it", + "pt", + "gb", + "ru", + "cn", + "th", + "jp", + "my", + "id", + "za", + "au" + ], + "type": "string" + }, + "EntityDateTimeEnum": { + "title": "EntityDateTimeEnum", + "description": "An enumeration.", + "enum": [ + "year", + "semester", + "quarter", + "bimester", + "month", + "week", + "day", + "hour", + "minute", + "second", + "date", + "time" + ], + "type": "string" + }, + "EntitySpatialEnum": { + "title": "EntitySpatialEnum", + "description": "An enumeration.", + "enum": [ + "continent", + "country", + "region", + "state", + "district", + "county", + "municipality", + "city", + "village", + "neighborhood", + "zip_code", + "census_tract" + ], + "type": "string" + }, + "EntityIndividualEnum": { + "title": "EntityIndividualEnum", + "description": "An enumeration.", + "enum": [ + "person", + "household", + "name", + "animal", + "plant" + ], + "type": "string" + }, + "EntityEstablishmentEnum": { + "title": "EntityEstablishmentEnum", + "description": "An enumeration.", + "enum": [ + "agency", + "protected_area", + "band", + "library", + "notary_office", + "school", + "legislature", + "police_station", + "company", + "station", + "stadium", + "terrorist_group", + "hospital", + "church", + "property", + "ministry", + "museum", + "construction", + "ngo", + "prison", + "team", + "court", + "store" + ], + "type": "string" + }, + "EntityPoliticsEnum": { + "title": "EntityPoliticsEnum", + "description": "An enumeration.", + "enum": [ + "agreement", + "speech", + "election", + "caucus", + "law", + "party", + "poll", + "vote" + ], + "type": "string" + }, + "EntityScienceEnum": { + "title": "EntityScienceEnum", + "description": "An enumeration.", + "enum": [ + "article", + "citation", + "domain", + "document", + "iceberg", + "book", + "newspaper", + "drug", + "patent", + "journal", + "word", + "post", + "langugage", + "crs", + "page", + "protein", + "meteor", + "terrain", + "typo" + ], + "type": "string" + }, + "EntityEconomicsEnum": { + "title": "EntityEconomicsEnum", + "description": "An enumeration.", + "enum": [ + "contract", + "donation", + "amendment", + "expenditure", + "item", + "grant", + "procurement", + "product", + "transaction", + "transfer", + "bill", + "occupation", + "sector" + ], + "type": "string" + }, + "EntityEducationEnum": { + "title": "EntityEducationEnum", + "description": "An enumeration.", + "enum": [ + "scholarship", + "exam" + ], + "type": "string" + }, + "EntityEventEnum": { + "title": "EntityEventEnum", + "description": "An enumeration.", + "enum": [ + "alert", + "attack", + "audit", + "act", + "concert", + "disinvitation", + "disaster", + "war", + "territorial_change", + "birth", + "death", + "request", + "protest", + "match", + "sanction" + ], + "type": "string" + }, + "EntityArtEnum": { + "title": "EntityArtEnum", + "description": "An enumeration.", + "enum": [ + "album", + "movie", + "photo", + "song", + "statue", + "painting", + "poem" + ], + "type": "string" + }, + "EntityInfrastructureEnum": { + "title": "EntityInfrastructureEnum", + "description": "An enumeration.", + "enum": [ + "dam", + "satellitte", + "street_road", + "roller_coaster" + ], + "type": "string" + }, + "EntityTransportationEnum": { + "title": "EntityTransportationEnum", + "description": "An enumeration.", + "enum": [ + "automobile", + "train", + "aircraft", + "ship" + ], + "type": "string" + }, + "EntitySecurityEnum": { + "title": "EntitySecurityEnum", + "description": "An enumeration.", + "enum": [ + "gun" + ], + "type": "string" + }, + "EntityDemographicEnum": { + "title": "EntityDemographicEnum", + "description": "An enumeration.", + "enum": [ + "age", + "race", + "sex" + ], + "type": "string" + }, + "EntityHistoryEnum": { + "title": "EntityHistoryEnum", + "description": "An enumeration.", + "enum": [ + "empire" + ], + "type": "string" + }, + "EntityOtherEnum": { + "title": "EntityOtherEnum", + "description": "An enumeration.", + "enum": [ + "other" + ], + "type": "string" + }, + "EntityImageEnum": { + "title": "EntityImageEnum", + "description": "An enumeration.", + "enum": [ + "pixel", + "polygon" + ], + "type": "string" + }, + "ObservationLevel": { + "title": "ObservationLevel", + "type": "object", + "properties": { + "country": { + "title": "País", + "description": "País da entidade. Deixar nulo se entidade for internacional ou não-espacial.\nOpções em 'https://basedosdados.org/api/3/action/bd_available_options'", + "allOf": [ + { + "$ref": "#/definitions/CountryEnum" + } + ] + }, + "entity": { + "title": "Entidade", + "description": "Opções em 'https://basedosdados.org/api/3/action/bd_available_options'", + "anyOf": [ + { + "$ref": "#/definitions/EntityDateTimeEnum" + }, + { + "$ref": "#/definitions/EntitySpatialEnum" + }, + { + "$ref": "#/definitions/EntityIndividualEnum" + }, + { + "$ref": "#/definitions/EntityEstablishmentEnum" + }, + { + "$ref": "#/definitions/EntityPoliticsEnum" + }, + { + "$ref": "#/definitions/EntityScienceEnum" + }, + { + "$ref": "#/definitions/EntityEconomicsEnum" + }, + { + "$ref": "#/definitions/EntityEducationEnum" + }, + { + "$ref": "#/definitions/EntityEventEnum" + }, + { + "$ref": "#/definitions/EntityArtEnum" + }, + { + "$ref": "#/definitions/EntityInfrastructureEnum" + }, + { + "$ref": "#/definitions/EntityTransportationEnum" + }, + { + "$ref": "#/definitions/EntitySecurityEnum" + }, + { + "$ref": "#/definitions/EntityDemographicEnum" + }, + { + "$ref": "#/definitions/EntityHistoryEnum" + }, + { + "$ref": "#/definitions/EntityOtherEnum" + }, + { + "$ref": "#/definitions/EntityImageEnum" + } + ] + }, + "columns": { + "title": "Colunas identificadoras", + "description": "Colunas identificadoras da entidade", + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "LastUpdated": { + "title": "LastUpdated", + "type": "object", + "properties": { + "metadata": { + "title": "Metadados", + "type": "string" + }, + "data": { + "title": "Dados", + "type": "string" + }, + "release": { + "title": "Dados Originais", + "type": "string" + } + } + }, + "PublishedBy": { + "title": "PublishedBy", + "type": "object", + "properties": { + "name": { + "title": "Nome", + "type": "string" + }, + "email": { + "title": "Email", + "type": "string" + }, + "github_user": { + "title": "Usuário Github", + "type": "string" + }, + "ckan_user": { + "title": "Usuário CKAN", + "type": "string" + }, + "website": { + "title": "Website", + "type": "string" + } + } + }, + "DataCleanedBy": { + "title": "DataCleanedBy", + "type": "object", + "properties": { + "name": { + "title": "Nome", + "type": "string" + }, + "email": { + "title": "Email", + "type": "string" + }, + "github_user": { + "title": "Usuário Github", + "type": "string" + }, + "ckan_user": { + "title": "Usuário CKAN", + "type": "string" + }, + "website": { + "title": "Website", + "type": "string" + } + } + }, + "PartnerOrganization": { + "title": "PartnerOrganization", + "type": "object", + "properties": { + "name": { + "title": "Nome", + "description": "Nome completo", + "type": "string" + }, + "organization_id": { + "title": "ID Organização", + "description": "ID Organização - CKAN", + "type": "string" + } + } + }, + "BigQueryTypeEnum": { + "title": "BigQueryTypeEnum", + "description": "An enumeration.", + "enum": [ + "array", + "boolean", + "date", + "datetime", + "float64", + "geography", + "int64", + "numeric", + "string", + "struct", + "time", + "timestamp" + ], + "type": "string" + }, + "YesNoEnum": { + "title": "YesNoEnum", + "description": "An enumeration.", + "enum": [ + "yes", + "no" + ], + "type": "string" + }, + "DirectoryEnum": { + "title": "DirectoryEnum", + "description": "An enumeration.", + "enum": [ + "br_bd_diretorios_africa_sul", + "br_bd_diretorios_alemanha", + "br_bd_diretorios_argentina", + "br_bd_diretorios_australia", + "br_bd_diretorios_bolivia", + "br_bd_diretorios_brasil", + "br_bd_diretorios_canada", + "br_bd_diretorios_chile", + "br_bd_diretorios_china", + "br_bd_diretorios_colombia", + "br_bd_diretorios_data_tempo", + "br_bd_diretorios_dinamarca", + "br_bd_diretorios_espanha", + "br_bd_diretorios_franca", + "br_bd_diretorios_india", + "br_bd_diretorios_inglaterra", + "br_bd_diretorios_eua", + "br_bd_diretorios_mexico", + "br_bd_diretorios_mundo", + "br_bd_diretorios_noruega", + "br_bd_diretorios_peru", + "br_bd_diretorios_portugal", + "br_bd_diretorios_suecia", + "br_bd_diretorios_suica" + ], + "type": "string" + }, + "DirectoryColumn": { + "title": "DirectoryColumn", + "type": "object", + "properties": { + "dataset_id": { + "title": "ID Conjunto", + "allOf": [ + { + "$ref": "#/definitions/DirectoryEnum" + } + ] + }, + "table_id": { + "title": "ID Tabela", + "type": "string" + }, + "column_name": { + "title": "Nome Coluna", + "type": "string" + } + } + }, + "BdmColumns": { + "title": "BdmColumns", + "type": "object", + "properties": { + "name": { + "title": "Nome", + "description": "Nome em produção", + "yaml_order": { + "id_before": null, + "id_after": "bigquery_type" + }, + "type": "string" + }, + "bigquery_type": { + "title": "Tipo no BigQuery", + "description": "Tipo no BigQuery.\nOpções: string, int64, float64, date, time, geometry.Ver https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types.", + "yaml_order": { + "id_before": "name", + "id_after": "description" + }, + "allOf": [ + { + "$ref": "#/definitions/BigQueryTypeEnum" + } + ] + }, + "description": { + "title": "Descrição", + "description": "Descrição", + "yaml_order": { + "id_before": "bigquery_type", + "id_after": "temporal_coverage" + }, + "type": "string" + }, + "temporal_coverage": { + "title": "Cobertura Temporal", + "description": "Anos cobertos pela tabela.\nPreenchido como lista de intervalos sem repetir os metadados da tabela.Exemplo: 2001(1)2010, ou (1)2020, ou (1).", + "yaml_order": { + "id_before": "description", + "id_after": "covered_by_dictionary" + }, + "allOf": [ + { + "$ref": "#/definitions/TemporalCoverageEnum" + } + ] + }, + "covered_by_dictionary": { + "title": "Coberta por um Dicionário", + "description": "A coluna precisa de dicionário?\nOpções: yes, no.", + "default": "no", + "yaml_order": { + "id_before": "temporal_coverage", + "id_after": "directory_column" + }, + "allOf": [ + { + "$ref": "#/definitions/YesNoEnum" + } + ] + }, + "directory_column": { + "title": "Coluna Correspondente nos Diretórios", + "description": "Chave primária nos diretórios correspondente à coluna.", + "yaml_order": { + "id_before": "covered_by_dictionary", + "id_after": "measurement_unit" + }, + "allOf": [ + { + "$ref": "#/definitions/DirectoryColumn" + } + ] + }, + "measurement_unit": { + "title": "Unidade de Medida", + "description": "Qual é a unidade de medida da coluna?\nEscreva a fórmula matemática baseada nas chaves de unidades básicas permitidas em https://basedosdados.org/api/3/action/bd_available_options na seção Measurement Unit.\nExemplos: 'kilometer^2', 'meter^3 / second', '1000 * person', 'gigawatt'.", + "yaml_order": { + "id_before": "directory_column", + "id_after": "has_sensitive_data" + }, + "type": "string" + }, + "has_sensitive_data": { + "title": "Contém Dados Sensíveis (LGPD)", + "description": "A coluna contém dados sensíveis, como definido pela Lei Geral de Proteção de Dados (LGPD)?\nOpções: yes, no.", + "default": "no", + "yaml_order": { + "id_before": "measurement_unit", + "id_after": "observations" + }, + "allOf": [ + { + "$ref": "#/definitions/YesNoEnum" + } + ] + }, + "observations": { + "title": "Observações", + "description": "Informações sobre a coluna: arquitetura, decisões de limpeza, etc.", + "yaml_order": { + "id_before": "has_sensitive_data", + "id_after": "is_in_staging" + }, + "type": "string" + }, + "is_in_staging": { + "title": "Está em Staging", + "description": "A coluna está na tabela staging?\nOpções: True, False", + "default": true, + "yaml_order": { + "id_before": "observations", + "id_after": "is_partition" + }, + "type": "boolean" + }, + "is_partition": { + "title": "É Partição", + "description": "A coluna é uma partição?\nOpções: True, False", + "default": false, + "yaml_order": { + "id_before": "is_in_staging", + "id_after": null + }, + "type": "boolean" + } + }, + "required": [ + "name" + ] + } + } + } +} \ No newline at end of file diff --git a/python-package/basedosdados/upload/base.py b/python-package/basedosdados/upload/base.py index d786de488..b4933c386 100644 --- a/python-package/basedosdados/upload/base.py +++ b/python-package/basedosdados/upload/base.py @@ -1,29 +1,32 @@ """ Module for manage dataset using local credentials and config files """ -# pylint: disable=line-too-long, invalid-name, too-many-arguments, invalid-envvar-value,line-too-long -from pathlib import Path -import sys -from os import getenv -import shutil -import warnings + import base64 import json +import shutil +import sys +import warnings from functools import lru_cache +from os import getenv -from google.cloud import bigquery, storage +# pylint: disable=line-too-long, invalid-name, too-many-arguments, invalid-envvar-value,line-too-long +from pathlib import Path +from typing import Dict, List, Union + +import googleapiclient.discovery +import tomlkit +from google.cloud import bigquery, bigquery_connection_v1, storage from google.oauth2 import service_account from loguru import logger -import yaml -from jinja2 import Template -import tomlkit +from basedosdados.backend import Backend from basedosdados.constants import config, constants warnings.filterwarnings("ignore") -class Base: +class Base: # pylint: disable=too-many-instance-attributes """ Base class for all datasets """ @@ -31,9 +34,7 @@ class Base: def __init__( self, config_path=".basedosdados", - templates=None, bucket_name=None, - metadata_path=None, overwrite_cli_config=False, ): """ @@ -46,15 +47,20 @@ def __init__( else Path.home() / config_path ) - self.config_path = config_path + self.config_path = config_path self._init_config(force=overwrite_cli_config) self.config = self._load_config() self._config_log(config.verbose) - - self.templates = Path(templates or self.config["templates_path"]) - self.metadata_path = Path(metadata_path or self.config["metadata_path"]) self.bucket_name = bucket_name or self.config["bucket_name"] self.uri = f"gs://{self.bucket_name}" + "/staging/{dataset}/{table}/*" + self._backend = Backend(self.config.get("api", {}).get("url", None)) + + @property + def backend(self): + """ + Backend class + """ + return self._backend @staticmethod def _decode_env(env: str) -> str: @@ -75,12 +81,21 @@ def _load_credentials(self, mode: str): ) ) return service_account.Credentials.from_service_account_info( - info, scopes=["https://www.googleapis.com/auth/cloud-platform"] + info, + scopes=[ + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/drive", + "https://www.googleapis.com/auth/bigquery", + ], ) return service_account.Credentials.from_service_account_file( self.config["gcloud-projects"][mode]["credentials_path"], - scopes=["https://www.googleapis.com/auth/cloud-platform"], + scopes=[ + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/drive", + "https://www.googleapis.com/auth/bigquery", + ], ) @property @@ -95,27 +110,22 @@ def client(self): credentials=self._load_credentials("prod"), project=self.config["gcloud-projects"]["prod"]["name"], ), + bigquery_connection_prod=bigquery_connection_v1.ConnectionServiceClient( + credentials=self._load_credentials("prod") + ), bigquery_staging=bigquery.Client( credentials=self._load_credentials("staging"), project=self.config["gcloud-projects"]["staging"]["name"], ), + bigquery_connection_staging=bigquery_connection_v1.ConnectionServiceClient( + credentials=self._load_credentials("staging") + ), storage_staging=storage.Client( credentials=self._load_credentials("staging"), project=self.config["gcloud-projects"]["staging"]["name"], ), ) - @property - def main_vars(self): - """ - Variables for main templates - """ - return dict( - templates=self.templates, - metadata_path=self.metadata_path, - bucket_name=self.bucket_name, - ) - @staticmethod def _input_validator(context, default="", with_lower=True): """ @@ -146,7 +156,6 @@ def _selection_yn( """ while True: - res = self._input_validator(first_question, default_yn, with_lower) if res == "y": @@ -202,15 +211,12 @@ def _init_config(self, force): credentials_folder = self.config_path / "credentials" credentials_folder.mkdir(exist_ok=True, parents=True) - # Create template folder - self._refresh_templates() - # If environments are set but no files exist if ( (not config_file.exists()) - and (getenv(constants.ENV_CONFIG.value)) - and (getenv(constants.ENV_CREDENTIALS_PROD.value)) - and (getenv(constants.ENV_CREDENTIALS_STAGING.value)) + and (getenv(constants.ENV_CONFIG.value)) # noqa + and (getenv(constants.ENV_CREDENTIALS_PROD.value)) # noqa + and (getenv(constants.ENV_CREDENTIALS_STAGING.value)) # noqa ): # Create basedosdados files from envs with open(config_file, "w", encoding="utf-8") as f: @@ -224,7 +230,6 @@ def _init_config(self, force): f.close() if (not config_file.exists()) or (force): - # Load config file c_file = tomlkit.parse( (Path(__file__).resolve().parents[1] / "configs" / "config.toml") @@ -240,30 +245,13 @@ def _init_config(self, force): "[press enter to continue]" ) - ############# STEP 1 - METADATA PATH ####################### - - metadata_path = self._selection_yn( - first_question=( - "\n********* STEP 1 **********\n" - "Where are you going to save the metadata files of " - "datasets and tables?\n" - f"Is it at the current path ({Path.cwd()})? [Y/n]\n" - ), - default_yn="y", - default_return=Path.cwd(), - no_question=("\nWhere would you like to save it?\n" "metadata path: "), - with_lower=False, - ) - - c_file["metadata_path"] = str(Path(metadata_path) / "bases") - - ############# STEP 2 - CREDENTIALS PATH ###################### + # STEP 1 - CREDENTIALS PATH # credentials_path = self.config_path / "credentials" credentials_path = Path( self._selection_yn( first_question=( - "\n********* STEP 2 **********\n" + "\n********* STEP 1 **********\n" "Where do you want to save your Google Cloud credentials?\n" f"Is it at the {credentials_path}? [Y/n]\n" ), @@ -276,9 +264,9 @@ def _init_config(self, force): ) ) - ############# STEP 3 - STAGING CREDS. ####################### + # STEP 2 - STAGING CREDS. # project_staging = self._input_validator( - "\n********* STEP 3 **********\n" + "\n********* STEP 2 **********\n" "What is the Google Cloud Project that you are going to use " "to upload and treat data?\nIt might be something with 'staging'" "in the name. If you just have one project, put its name.\n" @@ -293,11 +281,11 @@ def _init_config(self, force): ) c_file["gcloud-projects"]["staging"]["name"] = project_staging - ############# STEP 4 - PROD CREDS. ####################### + # STEP 3 - PROD CREDS. # project_prod = self._selection_yn( first_question=( - "\n********* STEP 4 **********\n" + "\n********* STEP 3 **********\n" "Is your production project the same as the staging? [y/N]\n" ), default_yn="n", @@ -322,10 +310,10 @@ def _init_config(self, force): ) c_file["gcloud-projects"]["prod"]["name"] = project_prod - ############# STEP 5 - BUCKET NAME ####################### + # STEP 4 - BUCKET NAME # bucket_name = self._input_validator( - "\n********* STEP 5 **********\n" + "\n********* STEP 4 **********\n" "What is the Storage Bucket that you are going to be using to save the data?\n" "Bucket name [basedosdados]: ", "basedosdados", @@ -333,9 +321,16 @@ def _init_config(self, force): c_file["bucket_name"] = bucket_name - ############# STEP 6 - SET TEMPLATES ####################### + # STEP 5 - CONFIGURE API # - c_file["templates_path"] = str(self.config_path / "templates") + api_base_url = self._input_validator( + "\n********* STEP 5 **********\n" + "What is the URL of the API that you are going to use?\n" + "API url [https://staging.api.basedosdados.org/api/v1/graphql]: ", + "https://staging.api.basedosdados.org/api/v1/graphql", + ) + + c_file["api"]["url"] = api_base_url config_file.open("w", encoding="utf-8").write(tomlkit.dumps(c_file)) @@ -360,31 +355,9 @@ def _load_config(self): if getenv(constants.ENV_CONFIG.value): return tomlkit.parse(self._decode_env(constants.ENV_CONFIG.value)) return tomlkit.parse( - (self.config_path / "config.toml") - .open("r", encoding="utf-8") - .read() + (self.config_path / "config.toml").open("r", encoding="utf-8").read() ) - @staticmethod - def _load_yaml(file): - """ - Loads a yaml file - """ - - try: - return yaml.load(open(file, "r", encoding="utf-8"), Loader=yaml.SafeLoader) - except FileNotFoundError: - return None - - def _render_template(self, template_file, kargs): - """ - Render a template file - """ - - return Template( - (self.templates / template_file).open("r", encoding="utf-8").read() - ).render(**kargs) - @staticmethod def _check_mode(mode): """ @@ -408,12 +381,86 @@ def _check_mode(mode): f'{",".join(ACCEPTED_MODES)}' ) - def _refresh_templates(self): + def _get_project_id(self, mode: str) -> str: """ - Refreshes the templates + Get the project ID. """ - shutil.rmtree((self.config_path / "templates"), ignore_errors=True) - shutil.copytree( - (Path(__file__).resolve().parents[1] / "configs" / "templates"), - (self.config_path / "templates"), + return self.config["gcloud-projects"][mode]["name"] + + def _get_project_number(self, mode: str) -> str: + """ + Get the project number from project ID. + """ + credentials = self._load_credentials(mode) + crm_service = googleapiclient.discovery.build( + "cloudresourcemanager", "v1", credentials=credentials ) + project_id = self._get_project_id(mode) + # pylint: disable=no-member + return ( + crm_service.projects().get(projectId=project_id).execute()["projectNumber"] + ) + + def _get_project_iam_policy( + self, mode: str + ) -> Dict[str, Union[str, int, List[Dict[str, Union[str, List[str]]]]]]: + """ + Get the project IAM policy. + """ + credentials = self._load_credentials(mode) + service = googleapiclient.discovery.build( + "cloudresourcemanager", "v1", credentials=credentials + ) + policy = ( + service.projects() # pylint: disable=no-member + .getIamPolicy( + resource=self._get_project_id(mode), + body={"options": {"requestedPolicyVersion": 1}}, + ) + .execute() + ) + return policy + + def _set_project_iam_policy( + self, + policy: Dict[str, Union[str, int, List[Dict[str, Union[str, List[str]]]]]], + mode: str, + ): + """ + Set the project IAM policy. + """ + credentials = self._load_credentials(mode) + service = googleapiclient.discovery.build( + "cloudresourcemanager", "v1", credentials=credentials + ) + service.projects().setIamPolicy( # pylint: disable=no-member + resource=self._get_project_id(mode), body={"policy": policy} + ).execute() + + def _grant_role(self, role: str, member: str, mode: str): + """ + Grant a role to a member. + """ + policy = self._get_project_iam_policy(mode) + try: + binding = next(b for b in policy["bindings"] if b["role"] == role) + except StopIteration: + binding = {"role": role, "members": []} + policy["bindings"].append(binding) + if member not in binding["members"]: + binding["members"].append(member) + self._set_project_iam_policy(policy, mode) + + def _revoke_role(self, role: str, member: str, mode: str): + """ + Revoke a role from a member. + """ + policy = self._get_project_iam_policy(mode) + try: + binding = next(b for b in policy["bindings"] if b["role"] == role) + except StopIteration: + return + else: + if member in binding["members"]: + binding["members"].remove(member) + self._set_project_iam_policy(policy, mode) diff --git a/python-package/basedosdados/upload/connection.py b/python-package/basedosdados/upload/connection.py new file mode 100644 index 000000000..d88cb267a --- /dev/null +++ b/python-package/basedosdados/upload/connection.py @@ -0,0 +1,150 @@ +""" +Module for managing BigQuery Connections. +""" +# pylint: disable=line-too-long, fixme, invalid-name,line-too-long,unnecessary-lambda-assignment + +from typing import Union + +import google.auth +from google.cloud.bigquery_connection_v1.types import CloudResourceProperties +from google.cloud.bigquery_connection_v1.types.connection import ( + Connection as BQConnection, +) +from google.cloud.bigquery_connection_v1.types.connection import ( + CreateConnectionRequest, + DeleteConnectionRequest, + GetConnectionRequest, +) + +from basedosdados.upload.base import Base + + +class Connection(Base): + """ + Manages BigQuery Connections. + """ + + def __init__( # pylint: disable=too-many-arguments + self, + name: str, + location: str = None, + mode: str = "staging", + friendly_name: str = None, + description: str = None, + **kwargs, + ): + super().__init__(**kwargs) + self._name = name + self._location = location or "US" + self._mode = mode + self._friendly_name = friendly_name + self._description = description + self._project = self.config["gcloud-projects"][self._mode]["name"] + self._parent = f"projects/{self._project}/locations/{self._location}" + + @property + def exists(self) -> bool: + """ + Checks if connection exists. + """ + if self.connection: + return True + return False + + @property + def connection(self) -> Union[BQConnection, None]: + """ + Returns connection object. + """ + client = self.client[f"bigquery_connection_{self._mode}"] + request = GetConnectionRequest(name=f"{self._parent}/connections/{self._name}") + try: + return client.get_connection(request=request) + except google.api_core.exceptions.NotFound: + return None + except Exception as e: + raise e + + @property + def connection_id(self) -> str: + """ + Returns the connection id. The format is: + .. + """ + project_number = self._get_project_number(self._mode) + return f"{project_number}.{self._location.lower()}.{self._name}" + + @property + def service_account(self) -> str: + """ + Returns the service account associated with the connection. + """ + conn = self.connection + if conn: + return conn.cloud_resource.service_account_id # pylint: disable=no-member + raise ValueError("Connection does not exist.") + + def create(self): + """ + Creates a new connection. + """ + client = self.client[f"bigquery_connection_{self._mode}"] + request = CreateConnectionRequest( + parent=self._parent, + connection_id=self._name, + connection=BQConnection( + name=self._name, + friendly_name=self._friendly_name or self._name, + description=self._description or self._name, + cloud_resource=CloudResourceProperties(), + ), + ) + client.create_connection(request=request) + + def set_biglake_permissions(self): + """ + Grants all needed roles to the connection service account to be able to + access BigLake: + + - roles/storage.objectViewer (for staging) + """ + try: + self._grant_role( + role="roles/storage.objectViewer", + member=f"serviceAccount:{self.service_account}", + mode=self._mode, + ) + except Exception as e: + error_message = 'Failed to grant "roles/storage.objectViewer" role to ' + error_message += f"service account {self.service_account} " + error_message += f"for project {self._project}. Maybe you don't have " + error_message += "permissions to grant roles?" + raise Exception(error_message) from e + + def revoke_biglake_permissions(self): + """ + Revokes all roles from the connection service account. + """ + try: + self._revoke_role( + role="roles/storage.objectViewer", + member=f"serviceAccount:{self.service_account}", + mode=self._mode, + ) + except Exception as e: + error_message = 'Failed to revoke "roles/storage.objectViewer" role from ' + error_message += f"service account {self.service_account} " + error_message += f"for project {self._project}. Maybe you don't have " + error_message += "permissions to revoke roles?" + raise Exception(error_message) from e + + def delete(self): + """ + Deletes a connection. + """ + self.revoke_biglake_permissions() + client = self.client[f"bigquery_connection_{self._mode}"] + request = DeleteConnectionRequest( + name=f"{self._parent}/connections/{self._name}" + ) + client.delete_connection(request=request) diff --git a/python-package/basedosdados/upload/dataset.py b/python-package/basedosdados/upload/dataset.py index 9279299bc..380443483 100644 --- a/python-package/basedosdados/upload/dataset.py +++ b/python-package/basedosdados/upload/dataset.py @@ -1,15 +1,14 @@ """ Module for manage dataset to the server. """ -# pylint: disable=line-too-long, fixme, invalid-name,line-too-long,unnecessary-lambda-assignment -from pathlib import Path -from loguru import logger +# pylint: disable=line-too-long, fixme, invalid-name,line-too-long +from functools import lru_cache -from google.cloud import bigquery from google.api_core.exceptions import Conflict +from google.cloud import bigquery +from loguru import logger from basedosdados.upload.base import Base -from basedosdados.upload.metadata import Metadata class Dataset(Base): @@ -19,111 +18,62 @@ class Dataset(Base): def __init__(self, dataset_id, **kwargs): super().__init__(**kwargs) - self.dataset_id = dataset_id.replace("-", "_") - self.dataset_folder = Path(self.metadata_path / self.dataset_id) - self.metadata = Metadata(self.dataset_id, **kwargs) @property + @lru_cache def dataset_config(self): """ Dataset config file. """ - - return self._load_yaml( - self.metadata_path / self.dataset_id / "dataset_config.yaml" - ) + return self.backend.get_dataset_config(self.dataset_id) def _loop_modes(self, mode="all"): """ Loop modes. """ - mode = ["prod", "staging"] if mode == "all" else [mode] - dataset_tag = lambda m: f"_{m}" if m == "staging" else "" + def dataset_tag(m): + return f"_{m}" if m == "staging" else "" + mode = ["prod", "staging"] if mode == "all" else [mode] return ( { "client": self.client[f"bigquery_{m}"], "id": f"{self.client[f'bigquery_{m}'].project}.{self.dataset_id}{dataset_tag(m)}", + "mode": m, } for m in mode ) - @staticmethod - def _setup_dataset_object(dataset_id, location=None): + def _setup_dataset_object(self, dataset_id, location=None, mode="staging"): """ Setup dataset object. """ dataset = bigquery.Dataset(dataset_id) + if mode == "staging": + dataset_path = dataset_id.replace("_staging", "") + description = f"staging dataset for `{dataset_path}`" + labels = {"staging": True} + else: + try: + description = self.dataset_config.get("descriptionPt", "") + labels = { + tag.get("namePt"): True for tag in self.dataset_config.get("tags") + } + except BaseException: + logger.warning( + f"dataset {dataset_id} does not have a description in the API." + ) + description = "description not available in the API." + labels = {} - ## TODO: not being used since 1.6.0 - need to redo the description tha goes to bigquery - dataset.description = "Para saber mais acesse https://basedosdados.org/" - # dataset.description = self._render_template( - # Path("dataset/dataset_description.txt"), self.dataset_config - # ) - + dataset.description = description + dataset.labels = labels dataset.location = location - return dataset - def _write_readme_file(self): - """ - Write README.md file. - """ - - readme_content = ( - f"Como capturar os dados de {self.dataset_id}?\n\nPara cap" - f"turar esses dados, basta verificar o link dos dados orig" - f"inais indicado em dataset_config.yaml no item website.\n" - f"\nCaso tenha sido utilizado algum código de captura ou t" - f"ratamento, estes estarão contidos em code/. Se o dado pu" - f"blicado for em sua versão bruta, não existirá a pasta co" - f"de/.\n\nOs dados publicados estão disponíveis em: https:" - f"//basedosdados.org/dataset/{self.dataset_id.replace('_','-')}" - ) - - readme_path = Path(self.metadata_path / self.dataset_id / "README.md") - - with open(readme_path, "w", encoding="utf-8") as readmefile: - readmefile.write(readme_content) - - def init(self, replace=False): - """Initialize dataset folder at metadata_path at `metadata_path/`. - - The folder should contain: - - * `dataset_config.yaml` - * `README.md` - - Args: - replace (str): Optional. Whether to replace existing folder. - - Raises: - FileExistsError: If dataset folder already exists and replace is False - """ - - # Create dataset folder - try: - self.dataset_folder.mkdir(exist_ok=replace, parents=True) - except FileExistsError as e: - raise FileExistsError( - f"Dataset {str(self.dataset_folder.stem)} folder does not exists. " - "Set replace=True to replace current files." - ) from e - - # create dataset_config.yaml with metadata - self.metadata.create(if_exists="replace") - - # create README.md file - self._write_readme_file() - - # Add code folder - (self.dataset_folder / "code").mkdir(exist_ok=replace, parents=True) - - return self - def publicize(self, mode="all", dataset_is_public=True): """Changes IAM configuration to turn BigQuery dataset public. @@ -133,7 +83,6 @@ def publicize(self, mode="all", dataset_is_public=True): """ for m in self._loop_modes(mode): - dataset = m["client"].get_dataset(m["id"]) entries = dataset.access_entries # TODO https://github.com/basedosdados/mais/pull/1020 @@ -171,13 +120,26 @@ def publicize(self, mode="all", dataset_is_public=True): ) dataset.access_entries = entries m["client"].update_dataset(dataset, ["access_entries"]) - logger.success( - " {object} {object_id}_{mode} was {action}!", - object_id=self.dataset_id, - mode=mode, - object="Dataset", - action="publicized", + logger.success( + " {object} {object_id}_{mode} was {action}!", + object_id=self.dataset_id, + mode=m["mode"], + object="Dataset", + action="publicized", + ) + + def exists(self, mode="staging"): + """ + Check if dataset exists. + """ + ref_dataset_id = ( + self.dataset_id if mode == "prod" else self.dataset_id + "_staging" ) + try: + ref = self.client[f"bigquery_{mode}"].get_dataset(ref_dataset_id) + except Exception: + ref = None + return bool(ref) def create( self, mode="all", if_exists="raise", dataset_is_public=True, location=None @@ -210,40 +172,38 @@ def create( Warning: Dataset already exists and if_exists is set to `raise` """ - if if_exists == "replace": - self.delete(mode) - elif if_exists == "update": - - self.update() - return - # Set dataset_id to the ID of the dataset to create. for m in self._loop_modes(mode): - - # Construct a full Dataset object to send to the API. - dataset_obj = self._setup_dataset_object(m["id"], location=location) + if if_exists == "replace": + self.delete(mode=m["mode"]) + elif if_exists == "update": + self.update(mode=m["mode"]) + continue # Send the dataset to the API for creation, with an explicit timeout. # Raises google.api_core.exceptions.Conflict if the Dataset already # exists within the project. try: - m["client"].create_dataset(dataset_obj) # Make an API request. - logger.success( - " {object} {object_id}_{mode} was {action}!", - object_id=self.dataset_id, - mode=mode, - object="Dataset", - action="created", - ) - + if not self.exists(mode=m["mode"]): + # Construct a full Dataset object to send to the API. + dataset_obj = self._setup_dataset_object( + dataset_id=m["id"], location=location, mode=m["mode"] + ) + m["client"].create_dataset(dataset_obj) # Make an API request. + logger.success( + " {object} {object_id}_{mode} was {action}!", + object_id=self.dataset_id, + mode=m["mode"], + object="Dataset", + action="created", + ) + # Make prod dataset public + self.publicize(dataset_is_public=dataset_is_public, mode=m["mode"]) except Conflict as e: if if_exists == "pass": - return + continue raise Conflict(f"Dataset {self.dataset_id} already exists") from e - # Make prod dataset public - self.publicize(dataset_is_public=dataset_is_public) - def delete(self, mode="all"): """Deletes dataset in BigQuery. Toogle mode to choose which dataset to delete. @@ -252,15 +212,14 @@ def delete(self, mode="all"): """ for m in self._loop_modes(mode): - m["client"].delete_dataset(m["id"], delete_contents=True, not_found_ok=True) - logger.info( - " {object} {object_id}_{mode} was {action}!", - object_id=self.dataset_id, - mode=mode, - object="Dataset", - action="deleted", - ) + logger.info( + " {object} {object_id}_{mode} was {action}!", + object_id=self.dataset_id, + mode=m["mode"], + object="Dataset", + action="deleted", + ) def update(self, mode="all", location=None): """Update dataset description. Toogle mode to choose which dataset to update. @@ -273,22 +232,18 @@ def update(self, mode="all", location=None): """ for m in self._loop_modes(mode): - # Send the dataset to the API to update, with an explicit timeout. # Raises google.api_core.exceptions.Conflict if the Dataset already # exists within the project. m["client"].update_dataset( - self._setup_dataset_object( - m["id"], - location=location, - ), + self._setup_dataset_object(m["id"], location=location, mode=m["mode"]), fields=["description"], ) # Make an API request. - logger.success( - " {object} {object_id}_{mode} was {action}!", - object_id=self.dataset_id, - mode=mode, - object="Dataset", - action="updated", - ) + logger.success( + " {object} {object_id}_{mode} was {action}!", + object_id=self.dataset_id, + mode=m["mode"], + object="Dataset", + action="updated", + ) diff --git a/python-package/basedosdados/upload/datatypes.py b/python-package/basedosdados/upload/datatypes.py index 423cdda3c..9c8a60b20 100644 --- a/python-package/basedosdados/upload/datatypes.py +++ b/python-package/basedosdados/upload/datatypes.py @@ -1,39 +1,70 @@ -''' +""" Class for define external and partiton configs for each datatype -''' +""" # pylint: disable=protected-access,line-too-long import csv -from google.cloud import bigquery import pandas as pd -import pandavro +from google.cloud import bigquery + +try: + import pandavro + + _avro_dependencies = True +except ImportError: + _avro_dependencies = False + +from basedosdados.exceptions import BaseDosDadosMissingDependencyException class Datatype: - ''' + """ Manage external and partition config - ''' - def __init__( + """ + + def __init__( # pylint: disable=too-many-arguments self, - table_obj, + dataset_id="", + table_id="", + schema=None, source_format="csv", + csv_skip_leading_rows=1, + csv_delimiter=",", + csv_allow_jagged_rows=False, mode="staging", + bucket_name=None, partitioned=False, + biglake_connection_id=None, ): - - self.table_obj = table_obj + self.dataset_id = dataset_id.replace("_staging", "") + self.schema = schema self.source_format = source_format + self.csv_delimiter = csv_delimiter + self.csv_skip_leading_rows = csv_skip_leading_rows + self.csv_allow_jagged_rows = csv_allow_jagged_rows self.mode = mode + self.uri = f"gs://{bucket_name}/staging/{self.dataset_id}/{table_id}/*" self.partitioned = partitioned + self.biglake_connection_id = biglake_connection_id - def header(self, data_sample_path): - ''' + def header(self, data_sample_path, csv_delimiter): + """ Retrieve the header of the data sample - ''' + """ if self.source_format == "csv": - return next(csv.reader(open(data_sample_path, "r", encoding="utf-8"))) + # Replace 'data_sample_path' with your actual file path + with open(data_sample_path, "r", encoding="utf-8") as csv_file: + csv_reader = csv.reader(csv_file, delimiter=csv_delimiter) + return next(csv_reader) + if self.source_format == "avro": + if not _avro_dependencies: + raise BaseDosDadosMissingDependencyException( + "Optional dependencies for handling AVRO files are not installed. " + 'Please install basedosdados with the "avro" extra, such as:' + "\n\npip install basedosdados[avro]" + ) dataframe = pandavro.read_avro(str(data_sample_path)) return list(dataframe.columns.values) if self.source_format == "parquet": @@ -44,29 +75,29 @@ def header(self, data_sample_path): ) def partition(self): - ''' + """ Configure the partitioning of the table - ''' + """ hive_partitioning = bigquery.external_config.HivePartitioningOptions() - hive_partitioning.mode = "AUTO" - hive_partitioning.source_uri_prefix = self.table_obj.uri.format( - dataset=self.table_obj.dataset_id, table=self.table_obj.table_id - ).replace("*", "") + hive_partitioning.mode = "STRINGS" + hive_partitioning.source_uri_prefix = self.uri.replace("*", "") return hive_partitioning @property def external_config(self): - ''' + """ Configure the external table - ''' + """ if self.source_format == "csv": _external_config = bigquery.ExternalConfig("CSV") - _external_config.options.skip_leading_rows = 1 + _external_config.options.skip_leading_rows = self.csv_skip_leading_rows _external_config.options.allow_quoted_newlines = True _external_config.options.allow_jagged_rows = True _external_config.autodetect = False - _external_config.schema = self.table_obj._load_schema(self.mode) + _external_config.schema = self.schema + _external_config.options.field_delimiter = self.csv_delimiter + _external_config.options.allow_jagged_rows = self.csv_allow_jagged_rows elif self.source_format == "avro": _external_config = bigquery.ExternalConfig("AVRO") elif self.source_format == "parquet": @@ -75,9 +106,14 @@ def external_config(self): raise NotImplementedError( "Base dos Dados just supports csv, avro and parquet files" ) - - _external_config.source_uris = f"gs://{self.table_obj.bucket_name}/staging/{self.table_obj.dataset_id}/{self.table_obj.table_id}/*" + _external_config.source_uris = self.uri if self.partitioned: _external_config.hive_partitioning = self.partition() + if self.biglake_connection_id: + _external_config.connection_id = self.biglake_connection_id + # When using BigLake tables, schema must be provided to the `Table` object, not the + # `ExternalConfig` object. + _external_config.schema = None + return _external_config diff --git a/python-package/basedosdados/upload/metadata.py b/python-package/basedosdados/upload/metadata.py deleted file mode 100644 index 8928100a9..000000000 --- a/python-package/basedosdados/upload/metadata.py +++ /dev/null @@ -1,749 +0,0 @@ -""" -Class to manage the metadata of datasets and tables -""" -# pylint: disable=fixme, invalid-name, redefined-builtin, too-many-arguments, undefined-loop-variable -from __future__ import annotations - -from copy import deepcopy -from functools import lru_cache -from loguru import logger - -import requests -from ckanapi import RemoteCKAN -from ckanapi.errors import NotAuthorized, ValidationError -from ruamel.yaml.comments import CommentedMap -from ruamel.yaml.compat import ordereddict -import ruamel.yaml as ryaml - -from basedosdados.exceptions import BaseDosDadosException -from basedosdados.upload.base import Base - - -class Metadata(Base): - """ - Manage metadata in CKAN backend. - """ - - def __init__(self, dataset_id, table_id=None, **kwargs): - super().__init__(**kwargs) - - self.table_id = table_id - self.dataset_id = dataset_id - - if self.table_id: - self.dataset_metadata_obj = Metadata(self.dataset_id, **kwargs) - - url = "https://basedosdados.org" - self.CKAN_API_KEY = self.config.get("ckan", {}).get("api_key") - self.CKAN_URL = self.config.get("ckan", {}).get("url", "") or url - - @property - def filepath(self) -> str: - """Build the dataset or table filepath""" - - filename = "dataset_config.yaml" - if self.table_id: - filename = f"{self.table_id}/table_config.yaml" - return self.metadata_path / self.dataset_id / filename - - @property - def local_metadata(self) -> dict: - """Load dataset or table local metadata""" - - if self.filepath.exists(): - with open(self.filepath, "r", encoding="utf-8") as file: - return ryaml.safe_load(file.read()) - return {} - - @property - def ckan_metadata(self) -> dict: - """Load dataset or table metadata from Base dos Dados CKAN""" - - ckan_dataset, ckan_table = self.ckan_metadata_extended - return ckan_table or ckan_dataset - - @property - def ckan_metadata_extended(self) -> dict: - """Load dataset and table metadata from Base dos Dados CKAN""" - - dataset_id = self.dataset_id.replace("_", "-") - url = f"{self.CKAN_URL}/api/3/action/package_show?id={dataset_id}" - - ckan_response = requests.get(url, timeout=10).json() - dataset = ckan_response.get("result") - - if not ckan_response.get("success"): - return {}, {} - - if self.table_id: - for resource in dataset["resources"]: - if resource["name"] == self.table_id: - return dataset, resource - - return dataset, {} - - @property - def owner_org(self): - """ - Build `owner_org` field for each use case: table, dataset, new - or existing. - """ - - # in case `self` refers to a CKAN table's metadata - if self.table_id and self.exists_in_ckan(): - return self.dataset_metadata_obj.ckan_metadata.get("owner_org") - - # in case `self` refers to a new table's metadata - if self.table_id and not self.exists_in_ckan(): - if self.dataset_metadata_obj.exists_in_ckan(): - return self.dataset_metadata_obj.ckan_metadata.get("owner_org") - # mock `owner_org` for validation - return "3626e93d-165f-42b8-bde1-2e0972079694" - - # for datasets, `owner_org` must come from the YAML file - organization_id = "".join(self.local_metadata.get("organization") or []) - url = f"{self.CKAN_URL}/api/3/action/organization_show?id={organization_id}" - response = requests.get(url, timeout=10).json() - - if not response.get("success"): - raise BaseDosDadosException("Organization not found") - - owner_org = response.get("result", {}).get("id") - - return owner_org - - @property - def ckan_data_dict(self) -> dict: - """Helper function that structures local metadata for validation""" - - ckan_dataset, ckan_table = self.ckan_metadata_extended - - metadata = { - "id": ckan_dataset.get("id"), - "name": ckan_dataset.get("name") or self.dataset_id.replace("_", "-"), - "type": ckan_dataset.get("type") or "dataset", - "title": self.local_metadata.get("title"), - "private": ckan_dataset.get("private") or False, - "owner_org": self.owner_org, - "resources": ckan_dataset.get("resources", []) - or [{"resource_type": "external_link", "name": ""}] - or [{"resource_type": "information_request", "name": ""}], - "groups": [ - {"name": group} for group in self.local_metadata.get("groups", []) or [] - ], - "tags": [ - {"name": tag} for tag in self.local_metadata.get("tags", []) or [] - ], - "organization": {"name": self.local_metadata.get("organization")}, - "extras": [ - { - "key": "dataset_args", - "value": { - "short_description": self.local_metadata.get( - "short_description" - ), - "description": self.local_metadata.get("description"), - "ckan_url": self.local_metadata.get("ckan_url"), - "github_url": self.local_metadata.get("github_url"), - }, - } - ], - } - - if self.table_id: - metadata["resources"] = [ - { - "id": ckan_table.get("id"), - "description": self.local_metadata.get("description"), - "name": self.local_metadata.get("table_id"), - "resource_type": ckan_table.get("resource_type") or "bdm_table", - "version": self.local_metadata.get("version"), - "dataset_id": self.local_metadata.get("dataset_id"), - "table_id": self.local_metadata.get("table_id"), - "spatial_coverage": self.local_metadata.get("spatial_coverage"), - "temporal_coverage": self.local_metadata.get("temporal_coverage"), - "update_frequency": self.local_metadata.get("update_frequency"), - "observation_level": self.local_metadata.get("observation_level"), - "last_updated": self.local_metadata.get("last_updated"), - "published_by": self.local_metadata.get("published_by"), - "data_cleaned_by": self.local_metadata.get("data_cleaned_by"), - "data_cleaning_description": self.local_metadata.get( - "data_cleaning_description" - ), - "data_cleaning_code_url": self.local_metadata.get( - "data_cleaning_code_url" - ), - "partner_organization": self.local_metadata.get( - "partner_organization" - ), - "raw_files_url": self.local_metadata.get("raw_files_url"), - "auxiliary_files_url": self.local_metadata.get( - "auxiliary_files_url" - ), - "architecture_url": self.local_metadata.get("architecture_url"), - "source_bucket_name": self.local_metadata.get("source_bucket_name"), - "project_id_prod": self.local_metadata.get("project_id_prod"), - "project_id_staging": self.local_metadata.get("project_id_staging"), - "partitions": self.local_metadata.get("partitions"), - "uncompressed_file_size": self.local_metadata.get( - "uncompressed_file_size" - ), - "compressed_file_size": self.local_metadata.get( - "compressed_file_size" - ), - "columns": self.local_metadata.get("columns"), - "metadata_modified": self.local_metadata.get("metadata_modified"), - "package_id": ckan_dataset.get("id"), - } - ] - - return metadata - - @property - @lru_cache(256) - def columns_schema(self) -> dict: - """Returns a dictionary with the schema of the columns""" - - url = f"{self.CKAN_URL}/api/3/action/bd_bdm_columns_schema" - - return requests.get(url, timeout=10).json().get("result") - - @property - @lru_cache(256) - def metadata_schema(self) -> dict: - """Get metadata schema from CKAN API endpoint""" - - if self.table_id: - table_url = f"{self.CKAN_URL}/api/3/action/bd_bdm_table_schema" - return requests.get(table_url, timeout=10).json().get("result") - - dataset_url = f"{self.CKAN_URL}/api/3/action/bd_dataset_schema" - return requests.get(dataset_url, timeout=10).json().get("result") - - def exists_in_ckan(self) -> bool: - """Check if Metadata object refers to an existing CKAN package or reso - urce. - - Returns: - bool: The existence condition of the metadata in CKAN. `True` if i - t exists, `False` otherwise. - """ - - if self.table_id: - url = f"{self.CKAN_URL}/api/3/action/bd_bdm_table_show?" - url += f"dataset_id={self.dataset_id}&table_id={self.table_id}" - else: - id = self.dataset_id.replace("_", "-") - # TODO: use `bd_bdm_dataset_show` when it's available for empty packages - url = f"{self.CKAN_URL}/api/3/action/package_show?id={id}" - - exists_in_ckan = requests.get(url, timeout=10).json().get("success") - - return exists_in_ckan - - def is_updated(self) -> bool: - """Check if a dataset or table is updated - - Returns: - bool: The update condition of local metadata. `True` if it corresp - onds to the most recent version of the given table or dataset's me - tadata in CKAN, `False` otherwise. - """ - - if not self.local_metadata.get("metadata_modified"): - return bool(not self.exists_in_ckan()) - ckan_modified = self.ckan_metadata.get("metadata_modified") - local_modified = self.local_metadata.get("metadata_modified") - return ckan_modified == local_modified - - def create( - self, - if_exists: str = "raise", - columns: list = None, - partition_columns: list = None, - force_columns: bool = False, - table_only: bool = True, - ) -> Metadata: - """Create metadata file based on the current version saved to CKAN database - - Args: - if_exists (str): Optional. What to do if config exists - * raise : Raises Conflict exception - * replace : Replaces config file with most recent - * pass : Do nothing - columns (list): Optional. - A `list` with the table columns' names. - partition_columns(list): Optional. - A `list` with the name of the table columns that partition the - data. - force_columns (bool): Optional. - If set to `True`, overwrite CKAN's columns with the ones provi - ded. - If set to `False`, keep CKAN's columns instead of the ones pro - vided. - table_only (bool): Optional. If set to `True`, only `table_config. - yaml` is created, even if there is no `dataset_config.yaml` fo - r the correspondent dataset metadata. If set to `False`, both - files are created if `dataset_config.yaml` doesn't exist yet. - Defaults to `True`. - - Returns: - Metadata: An instance of the `Metadata` class. - - Raises: - FileExistsError: If the correspodent YAML configuration file alrea - dy exists and `if_exists` is set to `"raise"`. - """ - - # see: https://docs.python.org/3/reference/compound_stmts.html#function-definitions - columns = [] if columns is None else columns - partition_columns = [] if partition_columns is None else partition_columns - - if self.filepath.exists() and if_exists == "raise": - raise FileExistsError( - f"{self.filepath} already exists." - + " Set the arg `if_exists` to `replace` to replace it." - ) - if if_exists != "pass": - ckan_metadata = self.ckan_metadata - - # Add local columns if - # 1. columns is empty and - # 2. force_columns is True - - # TODO: Is this sufficient to add columns? - if self.table_id and (force_columns or not ckan_metadata.get("columns")): - ckan_metadata["columns"] = [{"name": c} for c in columns] - - yaml_obj = build_yaml_object( - dataset_id=self.dataset_id, - table_id=self.table_id, - config=self.config, - schema=self.metadata_schema, - metadata=ckan_metadata, - columns_schema=self.columns_schema, - partition_columns=partition_columns, - ) - - self.filepath.parent.mkdir(parents=True, exist_ok=True) - - with open(self.filepath, "w", encoding="utf-8") as file: - ruamel = ryaml.YAML() - ruamel.preserve_quotes = True - ruamel.indent(mapping=4, sequence=6, offset=4) - ruamel.dump(yaml_obj, file) - - # if `dataset_config.yaml` doesn't exist but user wants to create - # it alongside `table_config.yaml` - dataset_config_exists = ( - self.metadata_path / self.dataset_id / "dataset_config.yaml" - ).exists() - if self.table_id and not table_only and not dataset_config_exists: - self.dataset_metadata_obj.create(if_exists=if_exists) - - logger.success( - " {object} {object_id} was {action}!", - object_id=self.table_id, - object="Metadata", - action="created", - ) - - return self - - def validate(self) -> bool: - """Validate dataset_config.yaml or table_config.yaml files. - The yaml file should be located at - metadata_path/dataset_id[/table_id/], - as defined in your config.toml - - Returns: - bool: - True if the metadata is valid. False if it is invalid. - - Raises: - BaseDosDadosException: - when the file has validation errors. - """ - - ckan = RemoteCKAN(self.CKAN_URL, user_agent="", apikey=None) - response = ckan.action.bd_dataset_validate(**self.ckan_data_dict) - - if response.get("errors"): - error = {self.ckan_data_dict.get("name"): response["errors"]} - message = f"{self.filepath} has validation errors: {error}" - raise BaseDosDadosException(message) - - logger.success( - " {object} {object_id} was {action}!", - object_id=self.table_id, - object="Metadata", - action="validated", - ) - - return True - - def publish( - self, - all: bool = False, - if_exists: str = "raise", - update_locally: bool = False, - ) -> dict: - """Publish local metadata modifications. - `Metadata.validate` is used to make sure no local invalid metadata is - published to CKAN. The `config.toml` `api_key` variable must be set - at the `[ckan]` section for this method to work. - - Args: - all (bool): Optional. If set to `True`, both `dataset_config.yaml` - and `table_config.yaml` are published for the given dataset_id - and table_id. - if_exists (str): Optional. What to do if config exists - * raise : Raises BaseDosDadosException if metadata already exi - sts in CKAN - * replace : Overwrite metadata in CKAN if it exists - * pass : Do nothing - update_locally (bool): Optional. If set to `True`, update the local - metadata with the one published to CKAN. - - Returns: - dict: - In case of success, a `dict` with the modified data - is returned. - - Raises: - BaseDosDadosException: - In case of CKAN's ValidationError or - NotAuthorized exceptions. - """ - - # alert user if they don't have an api_key set up yet - if not self.CKAN_API_KEY: - raise BaseDosDadosException( - "You can't use `Metadata.publish` without setting an `api_key`" - "in your ~/.basedosdados/config.toml. Please set it like this:" - '\n\n```\n[ckan]\nurl=""\napi_key=""\n```' - ) - - # check if metadata exists in CKAN and handle if_exists options - if self.exists_in_ckan(): - if if_exists == "raise": - raise BaseDosDadosException( - f"{self.dataset_id or self.table_id} already exists in CKAN." - f" Set the arg `if_exists` to `replace` to replace it." - ) - if if_exists == "pass": - return {} - - ckan = RemoteCKAN(self.CKAN_URL, user_agent="", apikey=self.CKAN_API_KEY) - - try: - self.validate() - - assert self.is_updated(), ( - f"Could not publish metadata due to out-of-date config file. " - f"Please run `basedosdados metadata create {self.dataset_id} " - f"{self.table_id or ''}` to get the most recently updated met" - f"adata and apply your changes to it." - ) - - data_dict = self.ckan_data_dict.copy() - - if self.table_id: - - # publish dataset metadata first if user wants to publish both - if all: - self.dataset_metadata_obj.publish(if_exists=if_exists) - - data_dict = data_dict["resources"][0] - - published = ckan.call_action( - action="resource_patch" - if self.exists_in_ckan() - else "resource_create", - data_dict=data_dict, - ) - - else: - data_dict["resources"] = [] - - published = ckan.call_action( - action="package_patch" - if self.exists_in_ckan() - else "package_create", - data_dict=data_dict, - ) - - # recreate local metadata YAML file with the published data - if published and update_locally: - self.create(if_exists="replace") - self.dataset_metadata_obj.create(if_exists="replace") - - logger.success( - " {object} {object_id} was {action}!", - object_id=data_dict, - object="Metadata", - action="published", - ) - - return published - - except (BaseDosDadosException, ValidationError) as e: - message = ( - f"Could not publish metadata due to a validation error. Pleas" - f"e see the traceback below to get information on how to corr" - f"ect it.\n\n{repr(e)}" - ) - raise BaseDosDadosException(message) from e - - except NotAuthorized as e: - message = ( - "Could not publish metadata due to an authorization error. Pl" - "ease check if you set the `api_key` at the `[ckan]` section " - "of your ~/.basedosdados/config.toml correctly. You must be a" - "n authorized user to publish modifications to a dataset or t" - "able's metadata." - ) - raise BaseDosDadosException(message) from e - - -############################################################################### -# Helper Functions -############################################################################### - - -def handle_data(k, data, local_default=None): - """Parse API's response data so that it is used in the YAML configuration - files. - - Args: - k (str): a key of the CKAN API's response metadata dictionary. - data (dict): a dictionary of metadata generated from the API. - local_default (Any): the default value of the given key in ca - se its value is set to `None` in CKAN. - - Returns: - list: a list of metadata values - """ - - # If no data is None then return a empty dict - data = data if data is not None else {} - # If no data is found for that key, uses local default - selected = data.get(k, local_default) - - # In some cases like `tags`, `groups`, `organization` - # the API default is to return a dict or list[dict] with all info. - # But, we just use `name` to build the yaml - _selected = deepcopy(selected) - - if _selected == []: - return _selected - - if not isinstance(_selected, list): - _selected = [_selected] - - if isinstance(_selected[0], dict): - if _selected[0].get("id") is not None: - return [s.get("name") for s in _selected] - - return selected - - -def handle_complex_fields(yaml_obj, k, properties, definitions, data): - """Parse complex fields and send each part of them to `handle_data`. - - Args: - yaml_obj (ruamel.yaml.CommentedMap): A YAML object with complex fields - . - k (str): The name of the key of the complex field. - properties (dict): A dictionary that contains the description of the c - omplex field. - definitions (dict): A dictionary with the schemas of the each component - of the complex field. - data (dict): A dictionary with the metadata of the complex field. - - Returns: - CommentedMap: A YAML object augmented with the complex field. - """ - - yaml_obj[k] = ryaml.CommentedMap() - - # Parsing 'allOf': [{'$ref': '#/definitions/PublishedBy'}] - # To get PublishedBy - d = properties[k]["allOf"][0]["$ref"].split("/")[-1] - if "properties" in definitions[d].keys(): - for dk, _ in definitions[d]["properties"].items(): - - yaml_obj[k][dk] = handle_data( - k=dk, - data=data.get(k, {}), - ) - - return yaml_obj - - -def add_yaml_property( - yaml: CommentedMap, - properties: dict = None, - definitions: dict = None, - metadata: dict = None, - goal=None, - has_column=False, -): - """Recursivelly adds properties to yaml to maintain order. - - Args: - yaml (CommentedMap): A YAML object with complex fields. - properties (dict): A dictionary that contains the description of the c - omplex field. - definitions (dict): A dictionary with the schemas of each complex fiel - d. - metadata (dict): A dictionary with the metadata to fill the YAML. - goal (str): The next key to be added to the YAML. - has_column (bool): If the goal is a column, no comments are written. - """ - - # see: https://docs.python.org/3/reference/compound_stmts.html#function-definitions - properties = {} if properties is None else properties - definitions = {} if definitions is None else definitions - metadata = {} if metadata is None else metadata - - # Looks for the key - # If goal is none has to look for id_before == None - for key, property in properties.items(): - goal_was_reached = key == goal - goal_was_reached |= property["yaml_order"]["id_before"] is None - - if goal_was_reached: - if "allOf" in property: - yaml = handle_complex_fields( - yaml_obj=yaml, - k=key, - properties=properties, - definitions=definitions, - data=metadata, - ) - - if yaml[key] == ordereddict(): - yaml[key] = handle_data(k=key, data=metadata) - else: - yaml[key] = handle_data(k=key, data=metadata) - - # Add comments - comment = None - if not has_column: - description = properties[key].get("description", []) - comment = "\n" + "".join(description) - yaml.yaml_set_comment_before_after_key(key, before=comment) - break - - # Return a ruaml object when property doesn't point to any other property - id_after = properties[key]["yaml_order"]["id_after"] - - if id_after is None: - return yaml - if id_after not in properties.keys(): - raise BaseDosDadosException( - f"Inconsistent YAML ordering: {id_after} is pointed to by {key}" - f" but doesn't have itself a `yaml_order` field in the JSON S" - f"chema." - ) - updated_props = deepcopy(properties) - updated_props.pop(key) - return add_yaml_property( - yaml=yaml, - properties=updated_props, - definitions=definitions, - metadata=metadata, - goal=id_after, - has_column=has_column, - ) - - -def build_yaml_object( - dataset_id: str, - table_id: str, - config: dict, - schema: dict, - metadata: dict = None, - columns_schema: dict = None, - partition_columns: list = None, -): - """Build a dataset_config.yaml or table_config.yaml - - Args: - dataset_id (str): The dataset id. - table_id (str): The table id. - config (dict): A dict with the `basedosdados` client configurations. - schema (dict): A dict with the JSON Schema of the dataset or table. - metadata (dict): A dict with the metadata of the dataset or table. - columns_schema (dict): A dict with the JSON Schema of the columns of - the table. - partition_columns (list): A list with the partition columns of the - table. - - Returns: - CommentedMap: A YAML object with the dataset or table metadata. - """ - - # see: https://docs.python.org/3/reference/compound_stmts.html#function-definitions - metadata = {} if metadata is None else metadata - columns_schema = {} if columns_schema is None else columns_schema - partition_columns = [] if partition_columns is None else partition_columns - - properties: dict = schema["properties"] - definitions: dict = schema["definitions"] - - # Drop all properties without yaml_order - properties = { - key: value for key, value in properties.items() if value.get("yaml_order") - } - - # Add properties - yaml = add_yaml_property( - yaml=ryaml.CommentedMap(), - properties=properties, - definitions=definitions, - metadata=metadata, - ) - - # Add columns - if metadata.get("columns"): - yaml["columns"] = [] - for metadatum in metadata.get("columns"): - properties = add_yaml_property( - yaml=ryaml.CommentedMap(), - properties=columns_schema["properties"], - definitions=columns_schema["definitions"], - metadata=metadatum, - has_column=True, - ) - yaml["columns"].append(properties) - - # Add partitions in case of new dataset/talbe or local overwriting - if partition_columns and partition_columns != ["[]"]: - yaml["partitions"] = "" - for local_column in partition_columns: - for remote_column in yaml["columns"]: - if remote_column["name"] == local_column: - remote_column["is_partition"] = True - yaml["partitions"] = partition_columns - - # Nullify `partitions` field in case of other-than-None empty values - if yaml.get("partitions") == "": - yaml["partitions"] = None - - if table_id: - # Add dataset_id and table_id - yaml["dataset_id"] = dataset_id - yaml["table_id"] = table_id - - # Add gcloud config variables - yaml["source_bucket_name"] = str(config.get("bucket_name")) - yaml["project_id_prod"] = str( - config.get("gcloud-projects", {}).get("prod", {}).get("name") - ) - yaml["project_id_staging"] = str( - config.get("gcloud-projects", {}).get("staging", {}).get("name") - ) - - return yaml diff --git a/python-package/basedosdados/upload/storage.py b/python-package/basedosdados/upload/storage.py index 40291ee4c..c924471bb 100644 --- a/python-package/basedosdados/upload/storage.py +++ b/python-package/basedosdados/upload/storage.py @@ -1,17 +1,18 @@ -''' +""" Class for managing the files in cloud storage. -''' +""" +import sys + # pylint: disable=invalid-name, too-many-arguments, undefined-loop-variable,line-too-long,broad-except,R0801 import time -from pathlib import Path -import sys import traceback +from pathlib import Path -from tqdm import tqdm from loguru import logger +from tqdm import tqdm -from basedosdados.upload.base import Base from basedosdados.exceptions import BaseDosDadosException +from basedosdados.upload.base import Base # google retryble exceptions. References: https://googleapis.dev/python/storage/latest/retry_timeout.html#module-google.cloud.storage.retry @@ -30,13 +31,10 @@ def __init__(self, dataset_id, table_id, **kwargs): @staticmethod def _resolve_partitions(partitions): - if isinstance(partitions, dict): - return "/".join(f"{k}={v}" for k, v in partitions.items()) + "/" if isinstance(partitions, str): - if partitions.endswith("/"): partitions = partitions[:-1] @@ -49,23 +47,24 @@ def _resolve_partitions(partitions): # check if it fits rule {b.split("=")[0]: b.split("=")[1] for b in partitions.split("/")} except IndexError as e: - raise Exception(f"The path {partitions} is not a valid partition") from e + raise Exception( + f"The path {partitions} is not a valid partition" + ) from e return partitions + "/" raise Exception(f"Partitions format or type not accepted: {partitions}") def _build_blob_name(self, filename, mode, partitions=None): - ''' + """ Builds the blob name. - ''' + """ # table folder blob_name = f"{mode}/{self.dataset_id}/{self.table_id}/" # add partition folder if partitions is not None: - blob_name += self._resolve_partitions(partitions) # add file name @@ -106,7 +105,6 @@ def init(self, replace=False, very_sure=False): self.client["storage_staging"].create_bucket(self.bucket) for folder in ["staging/", "raw/"]: - self.bucket.blob(folder).upload_from_string("") def upload( @@ -203,21 +201,17 @@ def upload( else [mode] ) for m in mode: - for filepath, part in tqdm(list(zip(paths, parts)), desc="Uploading files"): - blob_name = self._build_blob_name(filepath.name, m, part) blob = self.bucket.blob(blob_name, chunk_size=chunk_size) if not blob.exists() or if_exists == "replace": - upload_args["timeout"] = upload_args.get("timeout", None) blob.upload_from_filename(str(filepath), **upload_args) elif if_exists == "pass": - pass else: @@ -229,23 +223,22 @@ def upload( "to 'replace' to overwrite data." ) - logger.success( - " {object} {filename}_{mode} was {action}!", - filename=filepath.name, - mode=mode, - object="File", - action="uploaded", - ) + logger.success( + " {object} {filename}_{mode} was {action}!", + filename=filepath.name, + mode=m, + object="File", + action="uploaded", + ) def download( self, filename="*", savepath=".", partitions=None, - mode="raw", + mode="staging", if_not_exists="raise", ): - """Download files from Google Storage from path `mode`/`dataset_id`/`table_id`/`partitions`/`filename` and replicate folder hierarchy on save, @@ -306,7 +299,6 @@ def download( # download all blobs matching the search to given savepath for blob in tqdm(blob_list, desc="Download Blob"): - # parse blob.name and get the csv file name csv_name = blob.name.split("/")[-1] @@ -314,11 +306,13 @@ def download( blob_folder = blob.name.replace(csv_name, "") # replicate folder hierarchy - (Path(savepath) / blob_folder).mkdir(parents=True, exist_ok=True) + savepath = Path(savepath) + (savepath / blob_folder).mkdir(parents=True, exist_ok=True) # download blob to savepath - savepath = f"{savepath}/{blob.name}" - blob.download_to_filename(filename=savepath) + save_file_path = savepath / blob.name + + blob.download_to_filename(filename=save_file_path) logger.success( " {object} {object_id}_{mode} was {action} at: {path}!", @@ -326,7 +320,7 @@ def download( mode=mode, object="File", action="downloaded", - path={str(savepath)} + path={str(savepath)}, ) def delete_file(self, filename, mode, partitions=None, not_found_ok=False): @@ -356,7 +350,6 @@ def delete_file(self, filename, mode, partitions=None, not_found_ok=False): ) for m in mode: - blob = self.bucket.blob(self._build_blob_name(filename, m, partitions)) if blob.exists() or not blob.exists() and not not_found_ok: @@ -391,7 +384,6 @@ def delete_table(self, mode="staging", bucket_name=None, not_found_ok=False): prefix = f"{mode}/{self.dataset_id}/{self.table_id}/" if bucket_name is not None: - table_blobs = list( self.client["storage_staging"] .bucket(f"{bucket_name}") @@ -399,7 +391,6 @@ def delete_table(self, mode="staging", bucket_name=None, not_found_ok=False): ) else: - table_blobs = list(self.bucket.list_blobs(prefix=prefix)) if not table_blobs: @@ -410,7 +401,7 @@ def delete_table(self, mode="staging", bucket_name=None, not_found_ok=False): ) # Divides table_blobs list for maximum batch request size table_blobs_chunks = [ - table_blobs[i : i + 999] for i in range(0, len(table_blobs), 999) + table_blobs[i : i + 999] for i in range(0, len(table_blobs), 999) # noqa ] for i, source_table in enumerate( @@ -443,6 +434,7 @@ def copy_table( source_bucket_name="basedosdados", destination_bucket_name=None, mode="staging", + new_table_id=None, ): """Copies table from a source bucket to your bucket, sends request in batches. @@ -458,6 +450,8 @@ def copy_table( mode (str): Folder of which dataset to update [raw|staging|header|auxiliary_files|architecture] Folder of which dataset to update. Defaults to "staging". + new_table_id (str): Optional. + New table id to be copied to. If None, defaults to the table id initialized when instantiating the Storage object. """ source_table_ref = list( @@ -472,18 +466,17 @@ def copy_table( ) if destination_bucket_name is None: - destination_bucket = self.bucket else: - destination_bucket = self.client["storage_staging"].bucket( destination_bucket_name ) # Divides source_table_ref list for maximum batch request size source_table_ref_chunks = [ - source_table_ref[i : i + 999] for i in range(0, len(source_table_ref), 999) + source_table_ref[i : i + 999] # noqa + for i in range(0, len(source_table_ref), 999) # noqa ] for i, source_table in enumerate( @@ -494,9 +487,15 @@ def copy_table( try: with self.client["storage_staging"].batch(): for blob in source_table: + new_name = None + if new_table_id: + new_name = blob.name.replace( + self.table_id, new_table_id + ) self.bucket.copy_blob( blob, destination_bucket=destination_bucket, + new_name=new_name, ) break except Exception: @@ -507,8 +506,9 @@ def copy_table( time.sleep(5) traceback.print_exc(file=sys.stderr) logger.success( - " {object} {object_id}_{mode} was {action}!", + " {object} {object_id}_{mode} was {action} to {new_object_id}_{mode}!", object_id=self.table_id, + new_object_id=new_table_id if new_table_id else self.table_id, mode=mode, object="Table", action="copied", diff --git a/python-package/basedosdados/upload/table.py b/python-package/basedosdados/upload/table.py index 4389d1534..6150eb3dd 100644 --- a/python-package/basedosdados/upload/table.py +++ b/python-package/basedosdados/upload/table.py @@ -1,27 +1,27 @@ """ Class for manage tables in Storage and Big Query """ + +import contextlib +import inspect +import textwrap +from copy import deepcopy +from functools import lru_cache + # pylint: disable=invalid-name, too-many-locals, too-many-branches, too-many-arguments,line-too-long,R0801,consider-using-f-string from pathlib import Path -import json -from copy import deepcopy -import textwrap -import inspect -from io import StringIO -from loguru import logger -from google.cloud import bigquery -import ruamel.yaml as ryaml -import requests -import pandas as pd import google.api_core.exceptions +from google.cloud import bigquery +from google.cloud.bigquery import SchemaField +from loguru import logger +from basedosdados.exceptions import BaseDosDadosException from basedosdados.upload.base import Base -from basedosdados.upload.storage import Storage +from basedosdados.upload.connection import Connection from basedosdados.upload.dataset import Dataset from basedosdados.upload.datatypes import Datatype -from basedosdados.upload.metadata import Metadata -from basedosdados.exceptions import BaseDosDadosException +from basedosdados.upload.storage import Storage class Table(Base): @@ -34,126 +34,297 @@ def __init__(self, dataset_id, table_id, **kwargs): self.table_id = table_id.replace("-", "_") self.dataset_id = dataset_id.replace("-", "_") - self.dataset_folder = Path(self.metadata_path / self.dataset_id) - self.table_folder = self.dataset_folder / table_id self.table_full_name = dict( prod=f"{self.client['bigquery_prod'].project}.{self.dataset_id}.{self.table_id}", staging=f"{self.client['bigquery_staging'].project}.{self.dataset_id}_staging.{self.table_id}", ) self.table_full_name.update(dict(all=deepcopy(self.table_full_name))) - self.metadata = Metadata(self.dataset_id, self.table_id, **kwargs) @property + @lru_cache(256) def table_config(self): """ - Load table_config.yaml + Load table config """ - return self._load_yaml(self.table_folder / "table_config.yaml") + # return self._load_yaml(self.table_folder / "table_config.yaml") + return self.backend.get_table_config(self.dataset_id, self.table_id) def _get_table_obj(self, mode): """ Get table object from BigQuery """ + return self.client[f"bigquery_{mode}"].get_table(self.table_full_name[mode]) - def _is_partitioned(self): + def _is_partitioned( + self, data_sample_path=None, source_format=None, csv_delimiter=None + ): + if data_sample_path is not None: + table_columns = self._get_columns_from_data( + data_sample_path=data_sample_path, + source_format=source_format, + csv_delimiter=csv_delimiter, + mode="staging", + ) + else: + table_columns = self._get_columns_metadata_from_api() + + return bool(table_columns.get("partition_columns", [])) + + def _load_schema_from_json( + self, + columns=None, + ): + schema = [] + + for col in columns: + # ref: https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.schema.SchemaField + if col.get("name") is None: + msg = "Columns must have a name! Check your data files for columns without name" + raise BaseDosDadosException(msg) + + schema.append( + SchemaField( + name=col.get("name"), + field_type=col.get("type"), + description=col.get("description", None), + ) + ) + return schema + + def _load_staging_schema_from_data( + self, data_sample_path=None, source_format="csv", csv_delimiter="," + ): """ - Check if table is partitioned + Generate schema from columns metadata in data sample """ - ## check if the table are partitioned, need the split because of a change in the type of partitions in pydantic - partitions = self.table_config["partitions"] - if partitions is None or len(partitions) == 0: - return False - if isinstance(partitions, list): - # check if any None inside list. - # False if it is the case Ex: [None, 'partition'] - # True otherwise Ex: ['partition1', 'partition2'] - return all(item is not None for item in partitions) + if self.table_exists(mode="staging"): + logger.warning( + " {object} {object_id} allready exists, replacing schema!", + object_id=self.table_id, + object="Table", + ) - raise ValueError("Partitions must be a list or None") + table_columns = self._get_columns_from_data( + data_sample_path=data_sample_path, + source_format=source_format, + csv_delimiter=csv_delimiter, + mode="staging", + ) - def _load_schema(self, mode="staging"): - """Load schema from table_config.yaml + return self._load_schema_from_json(columns=table_columns.get("columns")) + + def _load_schema_from_bq(self, mode="staging"): + """Load schema from table config Args: mode (bool): Which dataset to create [prod|staging]. + """ + table_columns = self._get_columns_from_bq() + columns = table_columns.get("partition_columns") + table_columns.get("columns") + return self._load_schema_from_json(columns=columns) - self._check_mode(mode) + def _load_schema_from_api(self, mode="staging"): + """Load schema from table config - json_path = self.table_folder / f"schema-{mode}.json" - columns = self.table_config["columns"] + Args: + mode (bool): Which dataset to create [prod|staging]. - if mode == "staging": - new_columns = [] - for c in columns: - # case is_in_staging are None then must be True - is_in_staging = ( - True if c.get("is_in_staging") is None else c["is_in_staging"] - ) - # append columns declared in table_config.yaml to schema only if is_in_staging: True - if is_in_staging and not c.get("is_partition"): - c["type"] = "STRING" - new_columns.append(c) + """ + if self.table_exists(mode=mode): + logger.warning( + " {object} {object_id} allready exists, replacing schema!", + object_id=self.table_id, + object="Table", + ) - del columns - columns = new_columns + table_columns = self._get_columns_metadata_from_api() + columns = table_columns.get("partition_columns") + table_columns.get("columns") - elif mode == "prod": - schema = self._get_table_obj(mode).schema + return self._load_schema_from_json(columns=columns) - # get field names for fields at schema and at table_config.yaml - column_names = [c["name"] for c in columns] - schema_names = [s.name for s in schema] + def _get_columns_from_data( + self, + data_sample_path=None, + source_format="csv", + csv_delimiter=",", + mode="staging", + ): # sourcery skip: low-code-quality + """ + Get the partition columns from the structure of data_sample_path. - # check if there are mismatched fields - not_in_columns = [name for name in schema_names if name not in column_names] - not_in_schema = [name for name in column_names if name not in schema_names] + Args: + data_sample_path (str, pathlib.PosixPath): Optional. + Data sample path to auto complete columns names + It supports Comma Delimited CSV, Apache Avro and + Apache Parquet. + source_format (str): Optional + Data source format. Only 'csv', 'avro' and 'parquet' + are supported. Defaults to 'csv'. + """ - # raise if field is not in table_config - if not_in_columns: - raise BaseDosDadosException( - "Column {error_columns} was not found in table_config.yaml. Are you sure that " - "all your column names between table_config.yaml, publish.sql and " - "{project_id}.{dataset_id}.{table_id} are the same?".format( - error_columns=not_in_columns, - project_id=self.table_config["project_id_prod"], - dataset_id=self.table_config["dataset_id"], - table_id=self.table_config["table_id"], - ) - ) + partition_columns = [] + if isinstance( + data_sample_path, + ( + str, + Path, + ), + ): + # Check if partitioned and get data sample and partition columns + data_sample_path = Path(data_sample_path) - # raise if field is not in schema - if not_in_schema: - raise BaseDosDadosException( - "Column {error_columns} was not found in publish.sql. Are you sure that " - "all your column names between table_config.yaml, publish.sql and " - "{project_id}.{dataset_id}.{table_id} are the same?".format( - error_columns=not_in_schema, - project_id=self.table_config["project_id_prod"], - dataset_id=self.table_config["dataset_id"], - table_id=self.table_config["table_id"], - ) - ) + if data_sample_path.is_dir(): + data_sample_path = [ + f + for f in data_sample_path.glob("**/*") + if f.is_file() and f.suffix == f".{source_format}" + ][0] + + partition_columns = [ + k.split("=")[0] + for k in data_sample_path.as_posix().split("/") + if "=" in k + ] + columns = Datatype(source_format=source_format).header( + data_sample_path=data_sample_path, csv_delimiter=csv_delimiter + ) - # if field is in schema, get field_type and field_mode - for c in columns: - for s in schema: - if c["name"] == s.name: - c["type"] = s.field_type - c["mode"] = s.mode - break - ## force utf-8, write schema_{mode}.json - json.dump(columns, (json_path).open("w", encoding="utf-8")) + return { + "columns": [{"name": col, "type": "STRING"} for col in columns], + "partition_columns": [ + {"name": col, "type": "STRING"} for col in partition_columns + ], + } - # load new created schema - return self.client[f"bigquery_{mode}"].schema_from_json(str(json_path)) + def _get_columns_metadata_from_api( + self, + ): + """ + Get columns and partition columns from API. + """ + table_columns = self.table_config.get("columns", {}) + columns = [col for col in table_columns if col.get("isPartition", {}) is False] + + partition_columns = [ + col for col in table_columns if col.get("isPartition", {}) is True + ] + + return { + "columns": [ + { + "name": col.get("name"), + "type": col.get("bigqueryType").get("name"), + "description": col.get("descriptionPt"), + } + for col in columns + ], + "partition_columns": [ + { + "name": col.get("name"), + "type": col.get("bigqueryType").get("name"), + "description": col.get("descriptionPt"), + } + for col in partition_columns + ], + } + + def _parser_blobs_to_partition_dict(self) -> dict: + """ + Extracts the partition information from the blobs. + """ + + if not self.table_exists(mode="staging"): + return + blobs = ( + self.client["storage_staging"] + .bucket(self.bucket_name) + .list_blobs(prefix=f"staging/{self.dataset_id}/{self.table_id}/") + ) + partitions_dict = {} + # only needs the first bloob + for blob in blobs: + for folder in blob.name.split("/"): + if "=" in folder: + key = folder.split("=")[0] + value = folder.split("=") + try: + partitions_dict[key].append(value) + except KeyError: + partitions_dict[key] = [value] + return partitions_dict + + def _get_columns_from_bq(self, mode="staging"): + if not self.table_exists(mode=mode): + msg = f"Table {self.dataset_id}.{self.table_id} does not exist in {mode}, please create first!" + raise logger.error(msg) + else: + schema = self._get_table_obj(mode=mode).schema + + partition_dict = self._parser_blobs_to_partition_dict() + + if partition_dict: + partition_columns = list(partition_dict.keys()) + else: + partition_columns = [] + + return { + "columns": [ + { + "name": col.name, + "type": col.field_type, + "description": col.description, + } + for col in schema + if col.name not in partition_columns + ], + "partition_columns": [ + { + "name": col.name, + "type": col.field_type, + "description": col.description, + } + for col in schema + if col.name in partition_columns + ], + } + + def _get_cross_columns_from_bq_api(self): + bq = self._get_columns_from_bq(mode="staging") + bq_columns = bq.get("partition_columns") + bq.get("columns") + + api = self._get_columns_metadata_from_api() + api_columns = api.get("partition_columns") + api.get("columns") + + # bq_columns_list = [col.get("name") for col in bq_columns] + # api_columns_list = [col.get("name") for col in api_columns] + + # not_in_api_columns = [ + # col for col in bq_columns_list if col not in api_columns_list + # ] + # not_in_bq_columns = [ + # col for col in api_columns_list if col not in bq_columns_list + # ] + # print("bq_columns_list", len(bq_columns_list)) + # print("api_columns_list", len(api_columns_list)) + # print("not_in_api_columns", not_in_api_columns) + # print("not_in_bq_columns", not_in_bq_columns) + + if api_columns != []: + for bq_col in bq_columns: + for api_col in api_columns: + if bq_col.get("name") == api_col.get("name"): + bq_col["type"] = api_col.get("type") + bq_col["description"] = api_col.get("description") + + return bq_columns def _make_publish_sql(self): """Create publish.sql with columns and bigquery_type""" - ### publish.sql header and instructions + # publish.sql header and instructions publish_txt = """ /* Query para publicar a tabela. @@ -175,36 +346,29 @@ def _make_publish_sql(self): */ """ + # table_columns = self._get_columns_from_api(mode="staging") + + columns = self._get_cross_columns_from_bq_api() + # remove triple quotes extra space publish_txt = inspect.cleandoc(publish_txt) publish_txt = textwrap.dedent(publish_txt) # add create table statement project_id_prod = self.client["bigquery_prod"].project - publish_txt += f"\n\nCREATE VIEW {project_id_prod}.{self.dataset_id}.{self.table_id} AS\nSELECT \n" + publish_txt += f"\n\nCREATE OR REPLACE VIEW {project_id_prod}.{self.dataset_id}.{self.table_id} AS\nSELECT \n" # sort columns by is_partition, partitions_columns come first - if self._is_partitioned(): - columns = sorted( - self.table_config["columns"], - key=lambda k: (k["is_partition"] is not None, k["is_partition"]), - reverse=True, - ) - else: - columns = self.table_config["columns"] - # add columns in publish.sql for col in columns: - name = col["name"] + name = col.get("name") bigquery_type = ( - "STRING" - if col["bigquery_type"] is None - else col["bigquery_type"].upper() + "STRING" if col.get("type") is None else col.get("type").upper() ) publish_txt += f"SAFE_CAST({name} AS {bigquery_type}) {name},\n" - ## remove last comma + # remove last comma publish_txt = publish_txt[:-2] + "\n" # add from statement @@ -213,35 +377,7 @@ def _make_publish_sql(self): f"FROM {project_id_staging}.{self.dataset_id}_staging.{self.table_id} AS t" ) - # save publish.sql in table_folder - (self.table_folder / "publish.sql").open("w", encoding="utf-8").write( - publish_txt - ) - - def _make_template(self, columns, partition_columns, if_table_config_exists, force_columns): - # create table_config.yaml with metadata - self.metadata.create( - if_exists=if_table_config_exists, - columns=partition_columns + columns, - partition_columns=partition_columns, - force_columns=force_columns, - table_only=False, - ) - - self._make_publish_sql() - - @staticmethod - def _sheet_to_df(columns_config_url_or_path): - """ - Convert sheet to dataframe - """ - url = columns_config_url_or_path.replace("edit#gid=", "export?format=csv&gid=") - try: - return pd.read_csv(StringIO(requests.get(url, timeout=10).content.decode("utf-8"))) - except Exception as e: - raise BaseDosDadosException( - "Check if your google sheet Share are: Anyone on the internet with this link can view" - ) from e + return publish_txt def table_exists(self, mode): """Check if table exists in BigQuery. @@ -257,385 +393,169 @@ def table_exists(self, mode): return bool(ref) - def update_columns(self, columns_config_url_or_path=None): - """ - Fills columns in table_config.yaml automatically using a public google sheets URL or a local file. Also regenerate - publish.sql and autofill type using bigquery_type. - - The sheet must contain the columns: - - name: column name - - description: column description - - bigquery_type: column bigquery type - - measurement_unit: column mesurement unit - - covered_by_dictionary: column related dictionary - - directory_column: column related directory in the format .: - - temporal_coverage: column temporal coverage - - has_sensitive_data: the column has sensitive data - - observations: column observations - Args: - columns_config_url_or_path (str): Path to the local architeture file or a public google sheets URL. - Path only suports csv, xls, xlsx, xlsm, xlsb, odf, ods, odt formats. - Google sheets URL must be in the format https://docs.google.com/spreadsheets/d//edit#gid=. - - """ - ruamel = ryaml.YAML() - ruamel.preserve_quotes = True - ruamel.indent(mapping=4, sequence=6, offset=4) - table_config_yaml = ruamel.load( - (self.table_folder / "table_config.yaml").open(encoding="utf-8") - ) - - if "https://docs.google.com/spreadsheets/d/" in columns_config_url_or_path: - if ( - "edit#gid=" not in columns_config_url_or_path - or "https://docs.google.com/spreadsheets/d/" - not in columns_config_url_or_path - or not columns_config_url_or_path.split("=")[1].isdigit() - ): + def _get_biglake_connection( + self, set_biglake_connection_permissions=True, location=None, mode="staging" + ): + connection = Connection(name="biglake", location=location, mode="staging") + if not connection.exists: + try: + logger.info("Creating BigLake connection...") + connection.create() + logger.success("BigLake connection created!") + except google.api_core.exceptions.Forbidden as exc: + logger.error( + "You don't have permission to create a BigLake connection. " + "Please contact an admin to create one for you." + ) raise BaseDosDadosException( - "The Google sheet url not in correct format." - "The url must be in the format https://docs.google.com/spreadsheets/d//edit#gid=" + "You don't have permission to create a BigLake connection. " + "Please contact an admin to create one for you." + ) from exc + except Exception as exc: + logger.error( + "Something went wrong while creating the BigLake connection. " + "Please contact an admin to create one for you." ) - df = self._sheet_to_df(columns_config_url_or_path) - else: - file_type = columns_config_url_or_path.split(".")[-1] - if file_type == "csv": - df = pd.read_csv(columns_config_url_or_path, encoding="utf-8") - elif file_type in ["xls", "xlsx", "xlsm", "xlsb", "odf", "ods", "odt"]: - df = pd.read_excel(columns_config_url_or_path) - else: raise BaseDosDadosException( - "File not suported. Only csv, xls, xlsx, xlsm, xlsb, odf, ods, odt are supported." + "Something went wrong while creating the BigLake connection. " + "Please contact an admin to create one for you." + ) from exc + if set_biglake_connection_permissions: + try: + logger.info("Setting permissions for BigLake service account...") + connection.set_biglake_permissions() + logger.success("Permissions set successfully!") + except google.api_core.exceptions.Forbidden as exc: + logger.error( + "Could not set permissions for BigLake service account. " + "Please make sure you have permissions to grant roles/storage.objectViewer" + f" to the BigLake service account. ({connection.service_account})." + " If you don't, please ask an admin to do it for you or set " + "set_biglake_connection_permissions=False." ) + raise BaseDosDadosException( + "Could not set permissions for BigLake service account. " + "Please make sure you have permissions to grant roles/storage.objectViewer" + f" to the BigLake service account. ({connection.service_account})." + " If you don't, please ask an admin to do it for you or set " + "set_biglake_connection_permissions=False." + ) from exc + except Exception as exc: + logger.error( + "Something went wrong while setting permissions for BigLake service account. " + "Please make sure you have permissions to grant roles/storage.objectViewer" + f" to the BigLake service account. ({connection.service_account})." + " If you don't, please ask an admin to do it for you or set " + "set_biglake_connection_permissions=False." + ) + raise BaseDosDadosException( + "Something went wrong while setting permissions for BigLake service account. " + "Please make sure you have permissions to grant roles/storage.objectViewer" + f" to the BigLake service account. ({connection.service_account})." + " If you don't, please ask an admin to do it for you or set " + "set_biglake_connection_permissions=False." + ) from exc - df = df.fillna("NULL") - - required_columns = [ - "name", - "bigquery_type", - "description", - "temporal_coverage", - "covered_by_dictionary", - "directory_column", - "measurement_unit", - "has_sensitive_data", - "observations", - ] - - not_found_columns = required_columns.copy() - for sheet_column in df.columns.tolist(): - for required_column in required_columns: - if sheet_column == required_column: - not_found_columns.remove(required_column) - if not_found_columns: - raise BaseDosDadosException( - f"The following required columns are not found: {', '.join(not_found_columns)}." - ) - - columns_parameters = zip( - *[df[required_column].tolist() for required_column in required_columns] - ) - for ( - name, - bigquery_type, - description, - temporal_coverage, - covered_by_dictionary, - directory_column, - measurement_unit, - has_sensitive_data, - observations, - ) in columns_parameters: - for col in table_config_yaml["columns"]: - if col["name"] == name: - col["bigquery_type"] = ( - col["bigquery_type"] - if bigquery_type == "NULL" - else bigquery_type.lower() - ) - - col["description"] = ( - col["description"] if description == "NULL" else description - ) - - col["temporal_coverage"] = ( - col["temporal_coverage"] - if temporal_coverage == "NULL" - else [temporal_coverage] - ) - - col["covered_by_dictionary"] = ( - "no" - if covered_by_dictionary == "NULL" - else covered_by_dictionary - ) - - dataset = directory_column.split(".")[0] - col["directory_column"]["dataset_id"] = ( - col["directory_column"]["dataset_id"] - if dataset == "NULL" - else dataset - ) - - table = directory_column.split(".")[-1].split(":")[0] - col["directory_column"]["table_id"] = ( - col["directory_column"]["table_id"] - if table == "NULL" - else table - ) - - column = directory_column.split(".")[-1].split(":")[-1] - col["directory_column"]["column_name"] = ( - col["directory_column"]["column_name"] - if column == "NULL" - else column - ) - col["measurement_unit"] = ( - col["measurement_unit"] - if measurement_unit == "NULL" - else measurement_unit - ) - - col["has_sensitive_data"] = ( - "no" if has_sensitive_data == "NULL" else has_sensitive_data - ) - - col["observations"] = ( - col["observations"] if observations == "NULL" else observations - ) - - with open(self.table_folder / "table_config.yaml", "w", encoding="utf-8") as f: - ruamel.dump(table_config_yaml, f) - - # regenerate publish.sql - self._make_publish_sql() - - def init( - self, - data_sample_path=None, - if_folder_exists="raise", - if_table_config_exists="raise", - source_format="csv", - force_columns = False, - columns_config_url_or_path=None, - ): # sourcery skip: low-code-quality - """Initialize table folder at metadata_path at `metadata_path//`. - - The folder should contain: - - * `table_config.yaml` - * `publish.sql` + return connection - You can also point to a sample of the data to auto complete columns names. + def _get_table_description(self, mode="staging"): + """Adds table description to BigQuery table. Args: - data_sample_path (str, pathlib.PosixPath): Optional. - Data sample path to auto complete columns names - It supports Comma Delimited CSV, Apache Avro and - Apache Parquet. - if_folder_exists (str): Optional. - What to do if table folder exists - - * 'raise' : Raises FileExistsError - * 'replace' : Replace folder - * 'pass' : Do nothing - if_table_config_exists (str): Optional - What to do if table_config.yaml and publish.sql exists - - * 'raise' : Raises FileExistsError - * 'replace' : Replace files with blank template - * 'pass' : Do nothing - source_format (str): Optional - Data source format. Only 'csv', 'avro' and 'parquet' - are supported. Defaults to 'csv'. - force_columns (bool): Optional. - If set to `True`, overwrite CKAN's columns with the ones provi - ded. - If set to `False`, keep CKAN's columns instead of the ones pro - vided. - columns_config_url_or_path (str): Path to the local architeture file or a public google sheets URL. - Path only suports csv, xls, xlsx, xlsm, xlsb, odf, ods, odt formats. - Google sheets URL must be in the format https://docs.google.com/spreadsheets/d//edit#gid=. - - Raises: - FileExistsError: If folder exists and replace is False. - NotImplementedError: If data sample is not in supported type or format. + table_obj (google.cloud.bigquery.table.Table): Table object. + mode (str): Which dataset to check [prod|staging]. """ - if not self.dataset_folder.exists(): - - raise FileExistsError( - f"Dataset folder {self.dataset_folder} folder does not exists. " - "Create a dataset before adding tables." - ) - - try: - self.table_folder.mkdir(exist_ok=(if_folder_exists == "replace")) - except FileExistsError as e: - if if_folder_exists == "raise": - raise FileExistsError( - f"Table folder already exists for {self.table_id}. " - ) from e - if if_folder_exists == "pass": - return self - - if not data_sample_path and if_table_config_exists != "pass": - raise BaseDosDadosException( - "You must provide a path to correctly create config files" - ) - - partition_columns = [] - if isinstance( - data_sample_path, - ( - str, - Path, - ), - ): - # Check if partitioned and get data sample and partition columns - data_sample_path = Path(data_sample_path) - - if data_sample_path.is_dir(): - - data_sample_path = [ - f - for f in data_sample_path.glob("**/*") - if f.is_file() and f.suffix == f".{source_format}" - ][0] - - partition_columns = [ - k.split("=")[0] - for k in data_sample_path.as_posix().split("/") - if "=" in k - ] - - columns = Datatype(self, source_format).header(data_sample_path) - + table_path = self.table_full_name["prod"] + if mode == "staging": + description = f"staging table for `{table_path}`" else: - - columns = ["column_name"] - - if if_table_config_exists == "pass": - # Check if config files exists before passing - if ( - Path(self.table_folder / "table_config.yaml").is_file() - and Path(self.table_folder / "publish.sql").is_file() - ): - pass - # Raise if no sample to determine columns - elif not data_sample_path: - raise BaseDosDadosException( - "You must provide a path to correctly create config files" - ) - else: - self._make_template(columns, partition_columns, if_table_config_exists, force_columns=force_columns) - - elif if_table_config_exists == "raise": - - # Check if config files already exist - if ( - Path(self.table_folder / "table_config.yaml").is_file() - and Path(self.table_folder / "publish.sql").is_file() - ): - - raise FileExistsError( - f"table_config.yaml and publish.sql already exists at {self.table_folder}" + try: + description = self.table_config.get("descriptionPt", "") + except BaseException: + logger.warning( + f"table {self.table_id} does not have a description in the API." ) - # if config files don't exist, create them - self._make_template(columns, partition_columns, if_table_config_exists, force_columns=force_columns) + description = "description not available in the API." - else: - # Raise: without a path to data sample, should not replace config files with empty template - self._make_template(columns, partition_columns, if_table_config_exists, force_columns=force_columns) + return description - if columns_config_url_or_path is not None: - self.update_columns(columns_config_url_or_path) - - return self - - def create( + def create( # pylint: disable=too-many-statements self, path=None, - force_dataset=True, + source_format="csv", + csv_delimiter=",", + csv_skip_leading_rows=1, + csv_allow_jagged_rows=False, if_table_exists="raise", if_storage_data_exists="raise", - if_table_config_exists="raise", - source_format="csv", - force_columns=False, - columns_config_url_or_path=None, + if_dataset_exists="pass", dataset_is_public=True, location=None, chunk_size=None, + biglake_table=False, + set_biglake_connection_permissions=True, ): - """Creates BigQuery table at staging dataset. + """Creates a BigQuery table in the staging dataset. - If you add a path, it automatically saves the data in the storage, - creates a datasets folder and BigQuery location, besides creating the - table and its configuration files. + If a path is provided, data is automatically saved in storage, + and a datasets folder and BigQuery location are created, in addition to creating + the table and its configuration files. - The new table should be located at `_staging.` in BigQuery. + The new table is located at `_staging.` in BigQuery. - It looks for data saved in Storage at `/staging///*` - and builds the table. + Data can be found in Storage at `/staging///*` + and is used to build the table. - It currently supports the types: + The following data types are supported: - - Comma Delimited CSV + - Comma-Delimited CSV - Apache Avro - Apache Parquet - Data can also be partitioned following the hive partitioning scheme - `=/=` - for instance, - `year=2012/country=BR`. The partition is automatcally detected - by searching for `partitions` on the `table_config.yaml`. + Data can also be partitioned following the Hive partitioning scheme + `=/=`; for example, + `year=2012/country=BR`. The partition is automatically detected by searching for `partitions` + in the `table_config.yaml` file. Args: - path (str or pathlib.PosixPath): Where to find the file that you want to upload to create a table with - job_config_params (dict): Optional. - Job configuration params from bigquery - if_table_exists (str): Optional - What to do if table exists - - * 'raise' : Raises Conflict exception - * 'replace' : Replace table - * 'pass' : Do nothing - force_dataset (bool): Creates `` folder and BigQuery Dataset if it doesn't exists. - if_table_config_exists (str): Optional. - What to do if config files already exist - - * 'raise': Raises FileExistError - * 'replace': Replace with blank template - * 'pass'; Do nothing - if_storage_data_exists (str): Optional. - What to do if data already exists on your bucket: - - * 'raise' : Raises Conflict exception - * 'replace' : Replace table - * 'pass' : Do nothing - source_format (str): Optional - Data source format. Only 'csv', 'avro' and 'parquet' + path (str or pathlib.PosixPath): The path to the file to be uploaded to create the table. + source_format (str): Optional. The format of the data source. Only 'csv', 'avro', and 'parquet' are supported. Defaults to 'csv'. - force_columns (bool): Optional. - If set to `True`, overwrite CKAN's columns with the ones provi - ded. - If set to `False`, keep CKAN's columns instead of the ones pro - vided. - columns_config_url_or_path (str): Path to the local architeture file or a public google sheets URL. - Path only suports csv, xls, xlsx, xlsm, xlsb, odf, ods, odt formats. - Google sheets URL must be in the format https://docs.google.com/spreadsheets/d//edit#gid=. - - dataset_is_public (bool): Control if prod dataset is public or not. By default staging datasets like `dataset_id_staging` are not public. - - location (str): Optional. Location of dataset data. - List of possible region names locations: https://cloud.google.com/bigquery/docs/locations - - chunk_size (int): Optional - The size of a chunk of data whenever iterating (in bytes). - This must be a multiple of 256 KB per the API specification. + csv_delimiter (str): Optional. + The separator for fields in a CSV file. The separator can be any ISO-8859-1 + single-byte character. Defaults to ','. + csv_skip_leading_rows(int): Optional. + The number of rows at the top of a CSV file that BigQuery will skip when loading the data. + Defaults to 1. + csv_allow_jagged_rows (bool): Optional. + Indicates if BigQuery should allow extra values that are not represented in the table schema. + Defaults to False. + if_table_exists (str): Optional. Determines what to do if the table already exists: + + * 'raise' : Raises a Conflict exception + * 'replace' : Replaces the table + * 'pass' : Does nothing + if_storage_data_exists (str): Optional. Determines what to do if the data already exists on your bucket: + + * 'raise' : Raises a Conflict exception + * 'replace' : Replaces the table + * 'pass' : Does nothing + if_dataset_exists (str): Optional. Determines what to do if the dataset already exists: + + * 'raise' : Raises a Conflict exception + * 'replace' : Replaces the dataset + * 'pass' : Does nothing + dataset_is_public (bool): Optional. Controls if the prod dataset is public or not. By default, staging datasets like `dataset_id_staging` are not public. + location (str): Optional. The location of the dataset data. List of possible region names locations: https://cloud.google.com/bigquery/docs/locations + chunk_size (int): Optional. The size of a chunk of data whenever iterating (in bytes). This must be a multiple of 256 KB per the API specification. If not specified, the chunk_size of the blob itself is used. If that is not specified, a default value of 40 MB is used. + biglake_table (bool): Optional. Sets this as a BigLake table. BigLake tables allow end-users to query from external data (such as GCS) even if + they don't have access to the source data. IAM is managed like any other BigQuery native table. See https://cloud.google.com/bigquery/docs/biglake-intro for more on BigLake. + set_biglake_connection_permissions (bool): Optional. If set to `True`, attempts to grant the BigLake connection service account access to the table's data in GCS. + """ if path is None: - # Look if table data already exists at Storage data = self.client["storage_staging"].list_blobs( self.bucket_name, prefix=f"staging/{self.dataset_id}/{self.table_id}" @@ -655,127 +575,152 @@ def create( Path, ), ): - - Storage(self.dataset_id, self.table_id, **self.main_vars).upload( - path, + Storage(self.dataset_id, self.table_id).upload( + path=path, mode="staging", if_exists=if_storage_data_exists, chunk_size=chunk_size, ) # Create Dataset if it doesn't exist - if force_dataset: - dataset_obj = Dataset(self.dataset_id, **self.main_vars) + dataset_obj = Dataset( + self.dataset_id, + ) - try: - dataset_obj.init() - except FileExistsError: - pass + dataset_obj.create( + if_exists=if_dataset_exists, + mode="all", + location=location, + dataset_is_public=dataset_is_public, + ) - dataset_obj.create( - if_exists="pass", location=location, dataset_is_public=dataset_is_public + if biglake_table: + biglake_connection = self._get_biglake_connection( + set_biglake_connection_permissions=set_biglake_connection_permissions, + location=location, + mode="staging", ) - - self.init( - data_sample_path=path, - if_folder_exists="replace", - if_table_config_exists=if_table_config_exists, - columns_config_url_or_path=columns_config_url_or_path, - source_format=source_format, - force_columns=force_columns - ) + biglake_connection_id = biglake_connection.connection_id table = bigquery.Table(self.table_full_name["staging"]) + + table.description = self._get_table_description(mode="staging") + table.external_data_configuration = Datatype( - self, source_format, "staging", partitioned=self._is_partitioned() + dataset_id=self.dataset_id, + table_id=self.table_id, + schema=self._load_staging_schema_from_data( + data_sample_path=path, + source_format=source_format, + csv_delimiter=csv_delimiter, + ), + source_format=source_format, + csv_skip_leading_rows=csv_skip_leading_rows, + csv_delimiter=csv_delimiter, + csv_allow_jagged_rows=csv_allow_jagged_rows, + mode="staging", + bucket_name=self.bucket_name, + partitioned=self._is_partitioned( + data_sample_path=path, + source_format=source_format, + csv_delimiter=csv_delimiter, + ), + biglake_connection_id=biglake_connection_id if biglake_table else None, ).external_config + # When using BigLake tables, schema must be provided to the `Table` object + if biglake_table: + table.schema = self._load_staging_schema_from_data( + data_sample_path=path, + source_format=source_format, + csv_delimiter=csv_delimiter, + ) + logger.info(f"Using BigLake connection {biglake_connection_id}") + # Lookup if table alreay exists table_ref = None - try: + with contextlib.suppress(google.api_core.exceptions.NotFound): table_ref = self.client["bigquery_staging"].get_table( self.table_full_name["staging"] ) - except google.api_core.exceptions.NotFound: - pass - if isinstance(table_ref, google.cloud.bigquery.table.Table): - if if_table_exists == "pass": - return None if if_table_exists == "raise": - raise FileExistsError( "Table already exists, choose replace if you want to overwrite it" ) - if if_table_exists == "replace": - + if if_table_exists == "replace" and self.table_exists(mode="staging"): self.delete(mode="staging") - self.client["bigquery_staging"].create_table(table) + try: + self.client["bigquery_staging"].create_table(table) + except google.api_core.exceptions.Forbidden as exc: + if biglake_table: + raise BaseDosDadosException( + "Permission denied. The service account used to create the BigLake connection" + " does not have permission to read data from the source bucket. Please grant" + f" the service account {biglake_connection.service_account} the Storage Object Viewer" + " (roles/storage.objectViewer) role on the source bucket (or on the project)." + " Or, you can try running this again with set_biglake_connection_permissions=True." + ) from exc + raise BaseDosDadosException( + "Something went wrong when creating the table. Please check the logs for more information." + ) from exc + except Exception as exc: + raise BaseDosDadosException( + "Something went wrong when creating the table. Please check the logs for more information." + ) from exc logger.success( - "{object} {object_id} was {action}!", + "{object} {object_id} was {action} in {mode}!", object_id=self.table_id, + mode="staging", object="Table", action="created", ) - return None + # return None - def update(self, mode="all"): + def update(self, mode="prod", custom_schema=None): """Updates BigQuery schema and description. Args: mode (str): Optional. - Table of which table to update [prod|staging|all] + Table of which table to update [prod] not_found_ok (bool): Optional. What to do if table is not found """ self._check_mode(mode) - mode = ["prod", "staging"] if mode == "all" else [mode] - for m in mode: - - try: - table = self._get_table_obj(m) - except google.api_core.exceptions.NotFound: - continue + table = self._get_table_obj(mode) - # if m == "staging": + table.description = self._get_table_description() - table.description = self._render_template( - Path("table/table_description.txt"), self.table_config + # when mode is staging the table schema already exists + if mode == "prod" and custom_schema is None: + table.schema = self._load_schema_from_json( + columns=self._get_cross_columns_from_bq_api() ) + if mode == "prod" and custom_schema is not None: + table.schema = self._load_schema_from_json(custom_schema) + + fields = ["description", "schema"] - # save table description - with open( - self.metadata_path - / self.dataset_id - / self.table_id - / "table_description.txt", - "w", - encoding="utf-8", - ) as f: - f.write(table.description) - - # when mode is staging the table schema already exists - table.schema = self._load_schema(m) - fields = ["description", "schema"] if m == "prod" else ["description"] - self.client[f"bigquery_{m}"].update_table(table, fields=fields) + self.client["bigquery_prod"].update_table(table, fields=fields) logger.success( - " {object} {object_id} was {action}!", + " {object} {object_id} was {action} in {mode}!", object_id=self.table_id, + mode=mode, object="Table", action="updated", ) - def publish(self, if_exists="raise"): + def publish(self, if_exists="raise", custon_publish_sql=None, custom_schema=None): """Creates BigQuery table at production dataset. Table should be located at `.`. @@ -799,15 +744,25 @@ def publish(self, if_exists="raise"): * Check if all required fields are filled """ + # TODO: review this method - if if_exists == "replace": + if if_exists == "replace" and self.table_exists(mode="prod"): self.delete(mode="prod") - self.client["bigquery_prod"].query( - (self.table_folder / "publish.sql").open("r", encoding="utf-8").read() - ).result() + publish_sql = self._make_publish_sql() + + # create view using API metadata + if custon_publish_sql is None: + self.client["bigquery_prod"].query(publish_sql).result() + self.update(mode="prod") + + # create view using custon query + if custon_publish_sql is not None: + self.client["bigquery_prod"].query(custon_publish_sql).result() + # update schema using a custom schema + if custom_schema is not None: + self.update(custom_schema=custom_schema) - self.update() logger.success( " {object} {object_id} was {action}!", object_id=self.table_id, @@ -815,7 +770,7 @@ def publish(self, if_exists="raise"): action="published", ) - def delete(self, mode): + def delete(self, mode="all"): """Deletes table in BigQuery. Args: @@ -827,18 +782,17 @@ def delete(self, mode): if mode == "all": for m, n in self.table_full_name[mode].items(): self.client[f"bigquery_{m}"].delete_table(n, not_found_ok=True) - logger.info( - " {object} {object_id}_{mode} was {action}!", - object_id=self.table_id, - mode=mode, - object="Table", - action="deleted", - ) + logger.info( + " {object} {object_id}_{mode} was {action}!", + object_id=self.table_id, + mode=m, + object="Table", + action="deleted", + ) else: self.client[f"bigquery_{mode}"].delete_table( self.table_full_name[mode], not_found_ok=True ) - logger.info( " {object} {object_id}_{mode} was {action}!", object_id=self.table_id, @@ -882,7 +836,10 @@ def append( raise BaseDosDadosException( "You cannot append to a table that does not exist" ) - Storage(self.dataset_id, self.table_id, **self.main_vars).upload( + Storage( + self.dataset_id, + self.table_id, + ).upload( filepath, mode="staging", partitions=partitions, diff --git a/python-package/basedosdados/upload/utils.py b/python-package/basedosdados/upload/utils.py new file mode 100644 index 000000000..1f55f38dc --- /dev/null +++ b/python-package/basedosdados/upload/utils.py @@ -0,0 +1,92 @@ +""" Utilities functions for Upload sub-module""" +# pylint: disable=invalid-name,too-many-arguments,too-many-locals,too-many-branches,too-many-statements, protected-access,line-too-long +import os +from pathlib import Path +from typing import List + +import pandas as pd + + +def to_partitions(data: pd.DataFrame, partition_columns: List[str], savepath: str): + """Save data in to hive patitions schema, given a dataframe and a list of partition columns. + Args: + data (pandas.core.frame.DataFrame): Dataframe to be partitioned. + partition_columns (list): List of columns to be used as partitions. + savepath (str, pathlib.PosixPath): folder path to save the partitions + Exemple: + data = { + "ano": [2020, 2021, 2020, 2021, 2020, 2021, 2021,2025], + "mes": [1, 2, 3, 4, 5, 6, 6,9], + "sigla_uf": ["SP", "SP", "RJ", "RJ", "PR", "PR", "PR","PR"], + "dado": ["a", "b", "c", "d", "e", "f", "g",'h'], + } + to_partitions( + data=pd.DataFrame(data), + partition_columns=['ano','mes','sigla_uf'], + savepath='partitions/' + ) + """ + + if isinstance(data, (pd.core.frame.DataFrame)): + savepath = Path(savepath) + + # create unique combinations between partition columns + unique_combinations = ( + data[partition_columns] + .drop_duplicates(subset=partition_columns) + .to_dict(orient="records") + ) + + for filter_combination in unique_combinations: + patitions_values = [ + f"{partition}={value}" + for partition, value in filter_combination.items() + ] + + # get filtered data + df_filter = data.loc[ + data[filter_combination.keys()] + .isin(filter_combination.values()) + .all(axis=1), + :, + ] + df_filter = df_filter.drop(columns=partition_columns) + + # create folder tree + filter_save_path = Path(savepath / "/".join(patitions_values)) + filter_save_path.mkdir(parents=True, exist_ok=True) + file_filter_save_path = Path(filter_save_path) / "data.csv" + + # append data to csv + df_filter.to_csv( + file_filter_save_path, + index=False, + mode="a", + header=not file_filter_save_path.exists(), + ) + else: + raise BaseException("Data need to be a pandas DataFrame") + + +def break_file(filepath: str, columns: list, chunksize: int = 1000000) -> None: + """ + Break a file into smaller files, given a list of columns to be used as a key. + Args: + filepath (str, pathlib.PosixPath): file path to be broken. + columns (list): list of columns to be used as a key. + chunksize (int): number of rows to be read at a time. + Returns: + None + """ + reader = pd.read_csv(filepath, chunksize=chunksize, usecols=columns) + for i, chunk in enumerate(reader): + folder = os.path.dirname(filepath) + filename = os.path.basename(filepath) + subfolder = os.path.join(folder, filename.split(".")[0]) + # create subfolder + if not os.path.exists(subfolder): + os.makedirs(subfolder) + # save chunk + chunk.to_csv( + os.path.join(subfolder, f'{filename.split(".")[0]}_{i}.csv'), index=False + ) diff --git a/python-package/poetry.lock b/python-package/poetry.lock index 821453c29..3e3a95a2d 100644 --- a/python-package/poetry.lock +++ b/python-package/poetry.lock @@ -1,94 +1,341 @@ +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. + +[[package]] +name = "attrs" +version = "23.1.0" +description = "Classes Without Boilerplate" +optional = false +python-versions = ">=3.7" +files = [ + {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, + {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, +] + +[package.extras] +cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] +dev = ["attrs[docs,tests]", "pre-commit"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] +tests = ["attrs[tests-no-zope]", "zope-interface"] +tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] + +[[package]] +name = "backoff" +version = "2.2.1" +description = "Function decoration for backoff and retry" +optional = true +python-versions = ">=3.7,<4.0" +files = [ + {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, + {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, +] + +[[package]] +name = "black" +version = "23.7.0" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.8" +files = [ + {file = "black-23.7.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:5c4bc552ab52f6c1c506ccae05681fab58c3f72d59ae6e6639e8885e94fe2587"}, + {file = "black-23.7.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:552513d5cd5694590d7ef6f46e1767a4df9af168d449ff767b13b084c020e63f"}, + {file = "black-23.7.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:86cee259349b4448adb4ef9b204bb4467aae74a386bce85d56ba4f5dc0da27be"}, + {file = "black-23.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:501387a9edcb75d7ae8a4412bb8749900386eaef258f1aefab18adddea1936bc"}, + {file = "black-23.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb074d8b213749fa1d077d630db0d5f8cc3b2ae63587ad4116e8a436e9bbe995"}, + {file = "black-23.7.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:b5b0ee6d96b345a8b420100b7d71ebfdd19fab5e8301aff48ec270042cd40ac2"}, + {file = "black-23.7.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:893695a76b140881531062d48476ebe4a48f5d1e9388177e175d76234ca247cd"}, + {file = "black-23.7.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:c333286dc3ddca6fdff74670b911cccedacb4ef0a60b34e491b8a67c833b343a"}, + {file = "black-23.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831d8f54c3a8c8cf55f64d0422ee875eecac26f5f649fb6c1df65316b67c8926"}, + {file = "black-23.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:7f3bf2dec7d541b4619b8ce526bda74a6b0bffc480a163fed32eb8b3c9aed8ad"}, + {file = "black-23.7.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:f9062af71c59c004cd519e2fb8f5d25d39e46d3af011b41ab43b9c74e27e236f"}, + {file = "black-23.7.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:01ede61aac8c154b55f35301fac3e730baf0c9cf8120f65a9cd61a81cfb4a0c3"}, + {file = "black-23.7.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:327a8c2550ddc573b51e2c352adb88143464bb9d92c10416feb86b0f5aee5ff6"}, + {file = "black-23.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1c6022b86f83b632d06f2b02774134def5d4d4f1dac8bef16d90cda18ba28a"}, + {file = "black-23.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:27eb7a0c71604d5de083757fbdb245b1a4fae60e9596514c6ec497eb63f95320"}, + {file = "black-23.7.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:8417dbd2f57b5701492cd46edcecc4f9208dc75529bcf76c514864e48da867d9"}, + {file = "black-23.7.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:47e56d83aad53ca140da0af87678fb38e44fd6bc0af71eebab2d1f59b1acf1d3"}, + {file = "black-23.7.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:25cc308838fe71f7065df53aedd20327969d05671bac95b38fdf37ebe70ac087"}, + {file = "black-23.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:642496b675095d423f9b8448243336f8ec71c9d4d57ec17bf795b67f08132a91"}, + {file = "black-23.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:ad0014efc7acf0bd745792bd0d8857413652979200ab924fbf239062adc12491"}, + {file = "black-23.7.0-py3-none-any.whl", hash = "sha256:9fd59d418c60c0348505f2ddf9609c1e1de8e7493eab96198fc89d9f865e7a96"}, + {file = "black-23.7.0.tar.gz", hash = "sha256:022a582720b0d9480ed82576c920a8c1dde97cc38ff11d8d8859b3bd6ca9eedb"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + +[[package]] +name = "boltons" +version = "21.0.0" +description = "When they're not builtins, they're boltons." +optional = false +python-versions = "*" +files = [ + {file = "boltons-21.0.0-py2.py3-none-any.whl", hash = "sha256:b9bb7b58b2b420bbe11a6025fdef6d3e5edc9f76a42fb467afe7ca212ef9948b"}, + {file = "boltons-21.0.0.tar.gz", hash = "sha256:65e70a79a731a7fe6e98592ecfb5ccf2115873d01dbc576079874629e5c90f13"}, +] + +[[package]] +name = "bracex" +version = "2.3.post1" +description = "Bash style brace expander." +optional = false +python-versions = ">=3.7" +files = [ + {file = "bracex-2.3.post1-py3-none-any.whl", hash = "sha256:351b7f20d56fb9ea91f9b9e9e7664db466eb234188c175fd943f8f755c807e73"}, + {file = "bracex-2.3.post1.tar.gz", hash = "sha256:e7b23fc8b2cd06d3dec0692baabecb249dda94e06a617901ff03a6c56fd71693"}, +] + [[package]] name = "cachetools" -version = "4.2.4" +version = "5.3.1" description = "Extensible memoizing collections and decorators" -category = "main" optional = false -python-versions = "~=3.5" +python-versions = ">=3.7" +files = [ + {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"}, + {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"}, +] [[package]] name = "certifi" -version = "2021.10.8" +version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." -category = "main" optional = false -python-versions = "*" +python-versions = ">=3.6" +files = [ + {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, + {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, +] + +[[package]] +name = "cfgv" +version = "3.4.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] [[package]] name = "charset-normalizer" -version = "2.0.12" +version = "3.2.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" optional = false -python-versions = ">=3.5.0" - -[package.extras] -unicode_backport = ["unicodedata2"] +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"}, + {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, +] [[package]] -name = "ckanapi" -version = "4.6" -description = "A command line interface and Python module for accessing the CKAN Action API" -category = "main" +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" optional = false -python-versions = "*" +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] [package.dependencies] -docopt = "*" -python-slugify = ">=1.0" -requests = "*" -setuptools = "*" -six = ">=1.9,<2.0" +colorama = {version = "*", markers = "platform_system == \"Windows\""} [[package]] -name = "click" -version = "8.0.3" -description = "Composable command line interface toolkit" -category = "main" +name = "click-option-group" +version = "0.5.6" +description = "Option groups missing in Click" optional = false -python-versions = ">=3.6" +python-versions = ">=3.6,<4" +files = [ + {file = "click-option-group-0.5.6.tar.gz", hash = "sha256:97d06703873518cc5038509443742b25069a3c7562d1ea72ff08bfadde1ce777"}, + {file = "click_option_group-0.5.6-py3-none-any.whl", hash = "sha256:38a26d963ee3ad93332ddf782f9259c5bdfe405e73408d943ef5e7d0c3767ec7"}, +] [package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +Click = ">=7.0,<9" + +[package.extras] +docs = ["Pallets-Sphinx-Themes", "m2r2", "sphinx"] +tests = ["pytest"] +tests-cov = ["coverage", "coveralls", "pytest", "pytest-cov"] [[package]] name = "colorama" -version = "0.4.4" +version = "0.4.6" description = "Cross-platform colored terminal text." -category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] [[package]] name = "db-dtypes" -version = "1.0.0" +version = "1.1.1" description = "Pandas Data Types for SQL systems (BigQuery, Spanner)" -category = "main" optional = false -python-versions = ">=3.6, <3.11" +python-versions = ">=3.7" +files = [ + {file = "db-dtypes-1.1.1.tar.gz", hash = "sha256:ab485c85fef2454f3182427def0b0a3ab179b2871542787d33ba519d62078883"}, + {file = "db_dtypes-1.1.1-py2.py3-none-any.whl", hash = "sha256:23be34ea2bc91065447ecea4d5f107e46d1de223d152e69fa73673a62d5bd27d"}, +] [package.dependencies] -numpy = ">=1.16.6,<2.0dev" +numpy = ">=1.16.6" packaging = ">=17.0" -pandas = ">=0.24.2,<2.0dev" -pyarrow = ">=3.0.0,<8.0dev" +pandas = ">=0.24.2" +pyarrow = ">=3.0.0" + +[[package]] +name = "defusedxml" +version = "0.7.1" +description = "XML bomb protection for Python stdlib modules" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, + {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, +] + +[[package]] +name = "distlib" +version = "0.3.7" +description = "Distribution utilities" +optional = false +python-versions = "*" +files = [ + {file = "distlib-0.3.7-py2.py3-none-any.whl", hash = "sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057"}, + {file = "distlib-0.3.7.tar.gz", hash = "sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8"}, +] [[package]] -name = "docopt" -version = "0.6.2" -description = "Pythonic argument parser, that will make you smile" -category = "main" +name = "face" +version = "22.0.0" +description = "A command-line application framework (and CLI parser). Friendly for users, full-featured for developers." optional = false python-versions = "*" +files = [ + {file = "face-22.0.0-py3-none-any.whl", hash = "sha256:344fe31562d0f6f444a45982418f3793d4b14f9abb98ccca1509d22e0a3e7e35"}, + {file = "face-22.0.0.tar.gz", hash = "sha256:d5d692f90bc8f5987b636e47e36384b9bbda499aaf0a77aa0b0bbe834c76923d"}, +] + +[package.dependencies] +boltons = ">=20.0.0" [[package]] name = "fastavro" -version = "1.4.10" +version = "1.5.4" description = "Fast read/write of AVRO files" -category = "main" -optional = false +optional = true python-versions = ">=3.7" +files = [ + {file = "fastavro-1.5.4-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:d316cc476b2b24ef06402b8bfa047f8f72a9d6df2de777bb30d9ededda7e3a02"}, + {file = "fastavro-1.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8459faec46e34f2dfeb9b70ee8c36e935e626cff8608d675724718987a5f9ce5"}, + {file = "fastavro-1.5.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd44636d7ff8365a57b88707b747371fffb676c8c1f68c0d423ec36623888668"}, + {file = "fastavro-1.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:2402428b26d3c08a58acfa723833e19fb75077872bcb2475a4c81195cdae6a5d"}, + {file = "fastavro-1.5.4-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:5afc14398f4191d1a807aa59d2fba5ed869b31343679ec43dbc289db0a8e35c5"}, + {file = "fastavro-1.5.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5217e9713a3ea03205532394fba4d743749155b04b10b12a12fc26d225b89792"}, + {file = "fastavro-1.5.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e93a5eecb28cc35d670c9c4df70223fa9bcd6d9ca21b38b1b7ae13ece60c7fb"}, + {file = "fastavro-1.5.4-cp37-cp37m-win_amd64.whl", hash = "sha256:1a2f2465efd0e7de557c4034e8d4d88a132750cfa51e1582362a1b3a1a9fa911"}, + {file = "fastavro-1.5.4-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:f7d5bc76c03c692d9acea0e5d5baceec19e1c059b26cb8ae9f4481e842be95a5"}, + {file = "fastavro-1.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80fe920229ab1f40eccb1b4918481cdd8a20e5e7dce19308ab38b23732da8a70"}, + {file = "fastavro-1.5.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3d190aee86ab73caa1aa550eba850be2ca5dd29d814b38720f4e300184e01d5"}, + {file = "fastavro-1.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:b6c30299a49b11f42251cb81c8e15db67750642eac7ba5c194a5ee95c83ebb11"}, + {file = "fastavro-1.5.4-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:1f7685f3a3c38352abab432bad2f9f2229a0e5f5f8548831e887c30f8396f2e9"}, + {file = "fastavro-1.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd021ec850fd30020b7c4fa868466fb7f95450f1f06eac92bd2204cbd8e45fb8"}, + {file = "fastavro-1.5.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06a7b5602dfa032c92f20ca90b8bde88251573773e501bedf5e8b76b9feb14a3"}, + {file = "fastavro-1.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:18250aa2ab0f7a095b1865565cf9976ea4605c201129636e6defe24ec3ef112c"}, + {file = "fastavro-1.5.4.tar.gz", hash = "sha256:d86f72c966713fb699570a18f7960cf4110b069c70681d7538be8d671c9db7c8"}, +] [package.extras] codecs = ["lz4", "python-snappy", "zstandard"] @@ -96,60 +343,158 @@ lz4 = ["lz4"] snappy = ["python-snappy"] zstandard = ["zstandard"] +[[package]] +name = "filelock" +version = "3.12.2" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.7" +files = [ + {file = "filelock-3.12.2-py3-none-any.whl", hash = "sha256:cbb791cdea2a72f23da6ac5b5269ab0a0d161e9ef0100e653b69049a7706d1ec"}, + {file = "filelock-3.12.2.tar.gz", hash = "sha256:002740518d8aa59a26b0c76e10fb8c6e15eae825d34b6fdf670333fd7b938d81"}, +] + +[package.extras] +docs = ["furo (>=2023.5.20)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "diff-cover (>=7.5)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"] + +[[package]] +name = "flake8" +version = "6.1.0" +description = "the modular source code checker: pep8 pyflakes and co" +optional = false +python-versions = ">=3.8.1" +files = [ + {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, + {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, +] + +[package.dependencies] +mccabe = ">=0.7.0,<0.8.0" +pycodestyle = ">=2.11.0,<2.12.0" +pyflakes = ">=3.1.0,<3.2.0" + +[[package]] +name = "glom" +version = "22.1.0" +description = "A declarative object transformer and formatter, for conglomerating nested data." +optional = false +python-versions = "*" +files = [ + {file = "glom-22.1.0-py2.py3-none-any.whl", hash = "sha256:5339da206bf3532e01a83a35aca202960ea885156986d190574b779598e9e772"}, + {file = "glom-22.1.0.tar.gz", hash = "sha256:1510c6587a8f9c64a246641b70033cbc5ebde99f02ad245693678038e821aeb5"}, +] + +[package.dependencies] +attrs = "*" +boltons = ">=19.3.0" +face = ">=20.1.0" + +[package.extras] +yaml = ["PyYAML"] + [[package]] name = "google-api-core" -version = "1.31.5" +version = "2.11.1" description = "Google API client core library" -category = "main" optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" +python-versions = ">=3.7" +files = [ + {file = "google-api-core-2.11.1.tar.gz", hash = "sha256:25d29e05a0058ed5f19c61c0a78b1b53adea4d9364b464d014fbda941f6d1c9a"}, + {file = "google_api_core-2.11.1-py3-none-any.whl", hash = "sha256:d92a5a92dc36dd4f4b9ee4e55528a90e432b059f93aee6ad857f9de8cc7ae94a"}, +] [package.dependencies] -google-auth = ">=1.25.0,<2.0dev" -googleapis-common-protos = ">=1.6.0,<2.0dev" -grpcio = {version = ">=1.29.0,<2.0dev", optional = true, markers = "extra == \"grpc\""} -packaging = ">=14.3" -protobuf = {version = ">=3.12.0", markers = "python_version > \"3\""} -pytz = "*" -requests = ">=2.18.0,<3.0.0dev" -setuptools = ">=40.3.0" -six = ">=1.13.0" +google-auth = ">=2.14.1,<3.0.dev0" +googleapis-common-protos = ">=1.56.2,<2.0.dev0" +grpcio = [ + {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""}, + {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, +] +grpcio-status = [ + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "extra == \"grpc\""}, + {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, +] +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" +requests = ">=2.18.0,<3.0.0.dev0" [package.extras] -grpc = ["grpcio (>=1.29.0,<2.0dev)"] -grpcgcp = ["grpcio-gcp (>=0.2.2)"] -grpcio-gcp = ["grpcio-gcp (>=0.2.2)"] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] +grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] +grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] + +[[package]] +name = "google-api-python-client" +version = "2.97.0" +description = "Google API Client Library for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "google-api-python-client-2.97.0.tar.gz", hash = "sha256:48277291894876a1ca7ed4127e055e81f81e6343ced1b544a7200ae2c119dcd7"}, + {file = "google_api_python_client-2.97.0-py2.py3-none-any.whl", hash = "sha256:5215f4cd577753fc4192ccfbe0bb8b55d4bb5fd68fa6268ac5cf271b6305de31"}, +] + +[package.dependencies] +google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0.dev0" +google-auth = ">=1.19.0,<3.0.0.dev0" +google-auth-httplib2 = ">=0.1.0" +httplib2 = ">=0.15.0,<1.dev0" +uritemplate = ">=3.0.1,<5" [[package]] name = "google-auth" -version = "1.35.0" +version = "2.22.0" description = "Google Authentication Library" -category = "main" optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" +python-versions = ">=3.6" +files = [ + {file = "google-auth-2.22.0.tar.gz", hash = "sha256:164cba9af4e6e4e40c3a4f90a1a6c12ee56f14c0b4868d1ca91b32826ab334ce"}, + {file = "google_auth-2.22.0-py2.py3-none-any.whl", hash = "sha256:d61d1b40897407b574da67da1a833bdc10d5a11642566e506565d1b1a46ba873"}, +] [package.dependencies] -cachetools = ">=2.0.0,<5.0" +cachetools = ">=2.0.0,<6.0" pyasn1-modules = ">=0.2.1" -rsa = {version = ">=3.1.4,<5", markers = "python_version >= \"3.6\""} -setuptools = ">=40.3.0" +rsa = ">=3.1.4,<5" six = ">=1.9.0" +urllib3 = "<2.0" [package.extras] -aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "requests (>=2.20.0,<3.0.0dev)"] -pyopenssl = ["pyopenssl (>=20.0.0)"] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"] +enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] +pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] reauth = ["pyu2f (>=0.1.5)"] +requests = ["requests (>=2.20.0,<3.0.0.dev0)"] + +[[package]] +name = "google-auth-httplib2" +version = "0.1.0" +description = "Google Authentication Library: httplib2 transport" +optional = false +python-versions = "*" +files = [ + {file = "google-auth-httplib2-0.1.0.tar.gz", hash = "sha256:a07c39fd632becacd3f07718dfd6021bf396978f03ad3ce4321d060015cc30ac"}, + {file = "google_auth_httplib2-0.1.0-py2.py3-none-any.whl", hash = "sha256:31e49c36c6b5643b57e82617cb3e021e3e1d2df9da63af67252c02fa9c1f4a10"}, +] + +[package.dependencies] +google-auth = "*" +httplib2 = ">=0.15.0" +six = "*" [[package]] name = "google-auth-oauthlib" -version = "0.5.1" +version = "1.0.0" description = "Google Authentication Library" -category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "google-auth-oauthlib-1.0.0.tar.gz", hash = "sha256:e375064964820b47221a7e1b7ee1fd77051b6323c3f9e3e19785f78ab67ecfc5"}, + {file = "google_auth_oauthlib-1.0.0-py2.py3-none-any.whl", hash = "sha256:95880ca704928c300f48194d1770cf5b1462835b6e49db61445a520f793fd5fb"}, +] [package.dependencies] -google-auth = ">=1.0.0" +google-auth = ">=2.15.0" requests-oauthlib = ">=0.7.0" [package.extras] @@ -157,98 +502,212 @@ tool = ["click (>=6.0.0)"] [[package]] name = "google-cloud-bigquery" -version = "2.30.1" +version = "3.11.4" description = "Google BigQuery API client library" -category = "main" optional = false -python-versions = ">=3.6, <3.11" +python-versions = ">=3.7" +files = [ + {file = "google-cloud-bigquery-3.11.4.tar.gz", hash = "sha256:697df117241a2283bcbb93b21e10badc14e51c9a90800d2a7e1a3e1c7d842974"}, + {file = "google_cloud_bigquery-3.11.4-py2.py3-none-any.whl", hash = "sha256:5fa7897743a0ed949ade25a0942fc9e7557d8fce307c6f8a76d1b604cf27f1b1"}, +] [package.dependencies] -google-api-core = {version = ">=1.29.0,<3.0.0dev", extras = ["grpc"]} -google-cloud-core = ">=1.4.1,<3.0.0dev" +google-api-core = {version = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev", extras = ["grpc"]} +google-cloud-core = ">=1.6.0,<3.0.0dev" google-resumable-media = ">=0.6.0,<3.0dev" -grpcio = ">=1.38.1,<2.0dev" -packaging = ">=14.3" -proto-plus = ">=1.10.0" -protobuf = ">=3.12.0" +grpcio = [ + {version = ">=1.47.0,<2.0dev", markers = "python_version < \"3.11\""}, + {version = ">=1.49.1,<2.0dev", markers = "python_version >= \"3.11\""}, +] +packaging = ">=20.0.0" +proto-plus = ">=1.15.0,<2.0.0dev" +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" python-dateutil = ">=2.7.2,<3.0dev" -requests = ">=2.18.0,<3.0.0dev" +requests = ">=2.21.0,<3.0.0dev" [package.extras] -all = ["Shapely (>=1.6.0,<2.0dev)", "geopandas (>=0.9.0,<1.0dev)", "google-cloud-bigquery-storage (>=2.0.0,<3.0.0dev)", "grpcio (>=1.38.1,<2.0dev)", "opentelemetry-api (>=0.11b0)", "opentelemetry-instrumentation (>=0.11b0)", "opentelemetry-sdk (>=0.11b0)", "pandas (>=0.24.2)", "pyarrow (>=3.0.0,<7.0dev)", "tqdm (>=4.7.4,<5.0.0dev)"] -bignumeric_type = ["pyarrow (>=3.0.0,<7.0dev)"] -bqstorage = ["google-cloud-bigquery-storage (>=2.0.0,<3.0.0dev)", "grpcio (>=1.38.1,<2.0dev)", "pyarrow (>=3.0.0,<7.0dev)"] -geopandas = ["Shapely (>=1.6.0,<2.0dev)", "geopandas (>=0.9.0,<1.0dev)"] -opentelemetry = ["opentelemetry-api (>=0.11b0)", "opentelemetry-instrumentation (>=0.11b0)", "opentelemetry-sdk (>=0.11b0)"] -pandas = ["pandas (>=0.24.2)", "pyarrow (>=3.0.0,<7.0dev)"] +all = ["Shapely (>=1.8.4,<2.0dev)", "db-dtypes (>=0.3.0,<2.0.0dev)", "geopandas (>=0.9.0,<1.0dev)", "google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)", "ipywidgets (>=7.7.0)", "opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)", "tqdm (>=4.7.4,<5.0.0dev)"] +bqstorage = ["google-cloud-bigquery-storage (>=2.6.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "pyarrow (>=3.0.0)"] +geopandas = ["Shapely (>=1.8.4,<2.0dev)", "geopandas (>=0.9.0,<1.0dev)"] +ipython = ["ipykernel (>=6.0.0)", "ipython (>=7.23.1,!=8.1.0)"] +ipywidgets = ["ipykernel (>=6.0.0)", "ipywidgets (>=7.7.0)"] +opentelemetry = ["opentelemetry-api (>=1.1.0)", "opentelemetry-instrumentation (>=0.20b0)", "opentelemetry-sdk (>=1.1.0)"] +pandas = ["db-dtypes (>=0.3.0,<2.0.0dev)", "pandas (>=1.1.0)", "pyarrow (>=3.0.0)"] tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] +[[package]] +name = "google-cloud-bigquery-connection" +version = "1.13.1" +description = "Google Cloud Bigquery Connection API client library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "google-cloud-bigquery-connection-1.13.1.tar.gz", hash = "sha256:ca188b12f3acf07718b23adc58a9480e19d80291ebb67497fc6da2fea90c1ae7"}, + {file = "google_cloud_bigquery_connection-1.13.1-py2.py3-none-any.whl", hash = "sha256:ba2e0bbecb94a8cb4af88ba4261bc9d9078acdf2ba1b608db51dcd6439ba4c07"}, +] + +[package.dependencies] +google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} +grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" +proto-plus = [ + {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, + {version = ">=1.22.2,<2.0.0dev", markers = "python_version >= \"3.11\""}, +] +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" + [[package]] name = "google-cloud-bigquery-storage" -version = "1.1.0" -description = "BigQuery Storage API API client library" -category = "main" +version = "2.22.0" +description = "Google Cloud Bigquery Storage API client library" optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" +python-versions = ">=3.7" +files = [ + {file = "google-cloud-bigquery-storage-2.22.0.tar.gz", hash = "sha256:f6d8c7b3ab9b574c66977fcee9d336e334ad1a3843a722be19123640e7808ea3"}, + {file = "google_cloud_bigquery_storage-2.22.0-py2.py3-none-any.whl", hash = "sha256:7f11b2ae590a5b3874fb6ddf705a66a070340db238f971cf7b53349eee9ca317"}, +] [package.dependencies] -google-api-core = {version = ">=1.14.0,<2.0.0dev", extras = ["grpc"]} +google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} +proto-plus = [ + {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, + {version = ">=1.22.2,<2.0.0dev", markers = "python_version >= \"3.11\""}, +] +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" [package.extras] fastavro = ["fastavro (>=0.21.2)"] -pandas = ["pandas (>=0.17.1)"] +pandas = ["pandas (>=0.21.1)"] pyarrow = ["pyarrow (>=0.15.0)"] [[package]] name = "google-cloud-core" -version = "2.2.3" +version = "2.3.3" description = "Google Cloud API client core library" -category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" +files = [ + {file = "google-cloud-core-2.3.3.tar.gz", hash = "sha256:37b80273c8d7eee1ae816b3a20ae43585ea50506cb0e60f3cf5be5f87f1373cb"}, + {file = "google_cloud_core-2.3.3-py2.py3-none-any.whl", hash = "sha256:fbd11cad3e98a7e5b0343dc07cb1039a5ffd7a5bb96e1f1e27cee4bda4a90863"}, +] [package.dependencies] -google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev" +google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0dev" google-auth = ">=1.25.0,<3.0dev" [package.extras] -grpc = ["grpcio (>=1.8.2,<2.0dev)"] +grpc = ["grpcio (>=1.38.0,<2.0dev)"] [[package]] name = "google-cloud-storage" -version = "1.42.3" +version = "2.10.0" description = "Google Cloud Storage API client library" -category = "main" optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" +python-versions = ">=3.7" +files = [ + {file = "google-cloud-storage-2.10.0.tar.gz", hash = "sha256:934b31ead5f3994e5360f9ff5750982c5b6b11604dc072bc452c25965e076dc7"}, + {file = "google_cloud_storage-2.10.0-py2.py3-none-any.whl", hash = "sha256:9433cf28801671de1c80434238fb1e7e4a1ba3087470e90f70c928ea77c2b9d7"}, +] [package.dependencies] -google-api-core = {version = ">=1.29.0,<3.0dev", markers = "python_version >= \"3.6\""} -google-auth = {version = ">=1.25.0,<3.0dev", markers = "python_version >= \"3.6\""} -google-cloud-core = {version = ">=1.6.0,<3.0dev", markers = "python_version >= \"3.6\""} -google-resumable-media = {version = ">=1.3.0,<3.0dev", markers = "python_version >= \"3.6\""} -protobuf = {version = "*", markers = "python_version >= \"3.6\""} +google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev" +google-auth = ">=1.25.0,<3.0dev" +google-cloud-core = ">=2.3.0,<3.0dev" +google-resumable-media = ">=2.3.2" requests = ">=2.18.0,<3.0.0dev" -six = "*" + +[package.extras] +protobuf = ["protobuf (<5.0.0dev)"] [[package]] name = "google-crc32c" -version = "1.3.0" +version = "1.5.0" description = "A python wrapper of the C library 'Google CRC32C'" -category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" +files = [ + {file = "google-crc32c-1.5.0.tar.gz", hash = "sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7"}, + {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13"}, + {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346"}, + {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65"}, + {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b"}, + {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02"}, + {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4"}, + {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e"}, + {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c"}, + {file = "google_crc32c-1.5.0-cp310-cp310-win32.whl", hash = "sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee"}, + {file = "google_crc32c-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289"}, + {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273"}, + {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298"}, + {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57"}, + {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438"}, + {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906"}, + {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183"}, + {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd"}, + {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c"}, + {file = "google_crc32c-1.5.0-cp311-cp311-win32.whl", hash = "sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709"}, + {file = "google_crc32c-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-win32.whl", hash = "sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-win_amd64.whl", hash = "sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740"}, + {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8"}, + {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37"}, + {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894"}, + {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-win32.whl", hash = "sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4"}, + {file = "google_crc32c-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c"}, + {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7"}, + {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210"}, + {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd"}, + {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96"}, + {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61"}, + {file = "google_crc32c-1.5.0-cp39-cp39-win32.whl", hash = "sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c"}, + {file = "google_crc32c-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93"}, +] [package.extras] testing = ["pytest"] [[package]] name = "google-resumable-media" -version = "2.3.2" +version = "2.5.0" description = "Utilities for Google Media Downloads and Resumable Uploads" -category = "main" optional = false -python-versions = ">= 3.6" +python-versions = ">= 3.7" +files = [ + {file = "google-resumable-media-2.5.0.tar.gz", hash = "sha256:218931e8e2b2a73a58eb354a288e03a0fd5fb1c4583261ac6e4c078666468c93"}, + {file = "google_resumable_media-2.5.0-py2.py3-none-any.whl", hash = "sha256:da1bd943e2e114a56d85d6848497ebf9be6a14d3db23e9fc57581e7c3e8170ec"}, +] [package.dependencies] google-crc32c = ">=1.0,<2.0dev" @@ -259,109 +718,490 @@ requests = ["requests (>=2.18.0,<3.0.0dev)"] [[package]] name = "googleapis-common-protos" -version = "1.55.0" +version = "1.60.0" description = "Common protobufs used in Google APIs" -category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" +files = [ + {file = "googleapis-common-protos-1.60.0.tar.gz", hash = "sha256:e73ebb404098db405ba95d1e1ae0aa91c3e15a71da031a2eeb6b2e23e7bc3708"}, + {file = "googleapis_common_protos-1.60.0-py2.py3-none-any.whl", hash = "sha256:69f9bbcc6acde92cab2db95ce30a70bd2b81d20b12eff3f1aabaffcbe8a93918"}, +] + +[package.dependencies] +grpcio = {version = ">=1.44.0,<2.0.0.dev0", optional = true, markers = "extra == \"grpc\""} +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" + +[package.extras] +grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] + +[[package]] +name = "gql" +version = "3.4.1" +description = "GraphQL client for Python" +optional = true +python-versions = "*" +files = [ + {file = "gql-3.4.1-py2.py3-none-any.whl", hash = "sha256:315624ca0f4d571ef149d455033ebd35e45c1a13f18a059596aeddcea99135cf"}, + {file = "gql-3.4.1.tar.gz", hash = "sha256:11dc5d8715a827f2c2899593439a4f36449db4f0eafa5b1ea63948f8a2f8c545"}, +] [package.dependencies] -protobuf = ">=3.12.0" +backoff = ">=1.11.1,<3.0" +graphql-core = ">=3.2,<3.3" +yarl = ">=1.6,<2.0" [package.extras] -grpc = ["grpcio (>=1.0.0)"] +aiohttp = ["aiohttp (>=3.7.1,<3.9.0)"] +all = ["aiohttp (>=3.7.1,<3.9.0)", "botocore (>=1.21,<2)", "requests (>=2.26,<3)", "requests-toolbelt (>=0.9.1,<1)", "urllib3 (>=1.26,<2)", "websockets (>=10,<11)", "websockets (>=9,<10)"] +botocore = ["botocore (>=1.21,<2)"] +dev = ["aiofiles", "aiohttp (>=3.7.1,<3.9.0)", "black (==22.3.0)", "botocore (>=1.21,<2)", "check-manifest (>=0.42,<1)", "flake8 (==3.8.1)", "isort (==4.3.21)", "mock (==4.0.2)", "mypy (==0.910)", "parse (==1.15.0)", "pytest (==6.2.5)", "pytest-asyncio (==0.16.0)", "pytest-console-scripts (==1.3.1)", "pytest-cov (==3.0.0)", "requests (>=2.26,<3)", "requests-toolbelt (>=0.9.1,<1)", "sphinx (>=3.0.0,<4)", "sphinx-argparse (==0.2.5)", "sphinx-rtd-theme (>=0.4,<1)", "types-aiofiles", "types-mock", "types-requests", "urllib3 (>=1.26,<2)", "vcrpy (==4.0.2)", "websockets (>=10,<11)", "websockets (>=9,<10)"] +requests = ["requests (>=2.26,<3)", "requests-toolbelt (>=0.9.1,<1)", "urllib3 (>=1.26,<2)"] +test = ["aiofiles", "aiohttp (>=3.7.1,<3.9.0)", "botocore (>=1.21,<2)", "mock (==4.0.2)", "parse (==1.15.0)", "pytest (==6.2.5)", "pytest-asyncio (==0.16.0)", "pytest-console-scripts (==1.3.1)", "pytest-cov (==3.0.0)", "requests (>=2.26,<3)", "requests-toolbelt (>=0.9.1,<1)", "urllib3 (>=1.26,<2)", "vcrpy (==4.0.2)", "websockets (>=10,<11)", "websockets (>=9,<10)"] +test-no-transport = ["aiofiles", "mock (==4.0.2)", "parse (==1.15.0)", "pytest (==6.2.5)", "pytest-asyncio (==0.16.0)", "pytest-console-scripts (==1.3.1)", "pytest-cov (==3.0.0)", "vcrpy (==4.0.2)"] +websockets = ["websockets (>=10,<11)", "websockets (>=9,<10)"] + +[[package]] +name = "graphql-core" +version = "3.2.3" +description = "GraphQL implementation for Python, a port of GraphQL.js, the JavaScript reference implementation for GraphQL." +optional = true +python-versions = ">=3.6,<4" +files = [ + {file = "graphql-core-3.2.3.tar.gz", hash = "sha256:06d2aad0ac723e35b1cb47885d3e5c45e956a53bc1b209a9fc5369007fe46676"}, + {file = "graphql_core-3.2.3-py3-none-any.whl", hash = "sha256:5766780452bd5ec8ba133f8bf287dc92713e3868ddd83aee4faab9fc3e303dc3"}, +] + +[[package]] +name = "grpc-google-iam-v1" +version = "0.12.6" +description = "IAM API client library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "grpc-google-iam-v1-0.12.6.tar.gz", hash = "sha256:2bc4b8fdf22115a65d751c9317329322602c39b7c86a289c9b72d228d960ef5f"}, + {file = "grpc_google_iam_v1-0.12.6-py2.py3-none-any.whl", hash = "sha256:5c10f3d8dc2d88678ab1a9b0cb5482735c5efee71e6c0cd59f872eef22913f5c"}, +] + +[package.dependencies] +googleapis-common-protos = {version = ">=1.56.0,<2.0.0dev", extras = ["grpc"]} +grpcio = ">=1.44.0,<2.0.0dev" +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" [[package]] name = "grpcio" -version = "1.44.0" +version = "1.57.0" description = "HTTP/2-based RPC framework" -category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "grpcio-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:092fa155b945015754bdf988be47793c377b52b88d546e45c6a9f9579ac7f7b6"}, + {file = "grpcio-1.57.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:2f7349786da979a94690cc5c2b804cab4e8774a3cf59be40d037c4342c906649"}, + {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:82640e57fb86ea1d71ea9ab54f7e942502cf98a429a200b2e743d8672171734f"}, + {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40b72effd4c789de94ce1be2b5f88d7b9b5f7379fe9645f198854112a6567d9a"}, + {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f708a6a17868ad8bf586598bee69abded4996b18adf26fd2d91191383b79019"}, + {file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:60fe15288a0a65d5c1cb5b4a62b1850d07336e3ba728257a810317be14f0c527"}, + {file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6907b1cf8bb29b058081d2aad677b15757a44ef2d4d8d9130271d2ad5e33efca"}, + {file = "grpcio-1.57.0-cp310-cp310-win32.whl", hash = "sha256:57b183e8b252825c4dd29114d6c13559be95387aafc10a7be645462a0fc98bbb"}, + {file = "grpcio-1.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:7b400807fa749a9eb286e2cd893e501b110b4d356a218426cb9c825a0474ca56"}, + {file = "grpcio-1.57.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:c6ebecfb7a31385393203eb04ed8b6a08f5002f53df3d59e5e795edb80999652"}, + {file = "grpcio-1.57.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:00258cbe3f5188629828363ae8ff78477ce976a6f63fb2bb5e90088396faa82e"}, + {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:23e7d8849a0e58b806253fd206ac105b328171e01b8f18c7d5922274958cc87e"}, + {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5371bcd861e679d63b8274f73ac281751d34bd54eccdbfcd6aa00e692a82cd7b"}, + {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aed90d93b731929e742967e236f842a4a2174dc5db077c8f9ad2c5996f89f63e"}, + {file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fe752639919aad9ffb0dee0d87f29a6467d1ef764f13c4644d212a9a853a078d"}, + {file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fada6b07ec4f0befe05218181f4b85176f11d531911b64c715d1875c4736d73a"}, + {file = "grpcio-1.57.0-cp311-cp311-win32.whl", hash = "sha256:bb396952cfa7ad2f01061fbc7dc1ad91dd9d69243bcb8110cf4e36924785a0fe"}, + {file = "grpcio-1.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:e503cb45ed12b924b5b988ba9576dc9949b2f5283b8e33b21dcb6be74a7c58d0"}, + {file = "grpcio-1.57.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:fd173b4cf02b20f60860dc2ffe30115c18972d7d6d2d69df97ac38dee03be5bf"}, + {file = "grpcio-1.57.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:d7f8df114d6b4cf5a916b98389aeaf1e3132035420a88beea4e3d977e5f267a5"}, + {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:76c44efa4ede1f42a9d5b2fed1fe9377e73a109bef8675fb0728eb80b0b8e8f2"}, + {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4faea2cfdf762a664ab90589b66f416274887641ae17817de510b8178356bf73"}, + {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c60b83c43faeb6d0a9831f0351d7787a0753f5087cc6fa218d78fdf38e5acef0"}, + {file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b363bbb5253e5f9c23d8a0a034dfdf1b7c9e7f12e602fc788c435171e96daccc"}, + {file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f1fb0fd4a1e9b11ac21c30c169d169ef434c6e9344ee0ab27cfa6f605f6387b2"}, + {file = "grpcio-1.57.0-cp37-cp37m-win_amd64.whl", hash = "sha256:34950353539e7d93f61c6796a007c705d663f3be41166358e3d88c45760c7d98"}, + {file = "grpcio-1.57.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:871f9999e0211f9551f368612460442a5436d9444606184652117d6a688c9f51"}, + {file = "grpcio-1.57.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:a8a8e560e8dbbdf29288872e91efd22af71e88b0e5736b0daf7773c1fecd99f0"}, + {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:2313b124e475aa9017a9844bdc5eafb2d5abdda9d456af16fc4535408c7d6da6"}, + {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4098b6b638d9e0ca839a81656a2fd4bc26c9486ea707e8b1437d6f9d61c3941"}, + {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e5b58e32ae14658085c16986d11e99abd002ddbf51c8daae8a0671fffb3467f"}, + {file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0f80bf37f09e1caba6a8063e56e2b87fa335add314cf2b78ebf7cb45aa7e3d06"}, + {file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5b7a4ce8f862fe32b2a10b57752cf3169f5fe2915acfe7e6a1e155db3da99e79"}, + {file = "grpcio-1.57.0-cp38-cp38-win32.whl", hash = "sha256:9338bacf172e942e62e5889b6364e56657fbf8ac68062e8b25c48843e7b202bb"}, + {file = "grpcio-1.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:e1cb52fa2d67d7f7fab310b600f22ce1ff04d562d46e9e0ac3e3403c2bb4cc16"}, + {file = "grpcio-1.57.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:fee387d2fab144e8a34e0e9c5ca0f45c9376b99de45628265cfa9886b1dbe62b"}, + {file = "grpcio-1.57.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:b53333627283e7241fcc217323f225c37783b5f0472316edcaa4479a213abfa6"}, + {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:f19ac6ac0a256cf77d3cc926ef0b4e64a9725cc612f97228cd5dc4bd9dbab03b"}, + {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fdf04e402f12e1de8074458549337febb3b45f21076cc02ef4ff786aff687e"}, + {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5613a2fecc82f95d6c51d15b9a72705553aa0d7c932fad7aed7afb51dc982ee5"}, + {file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b670c2faa92124b7397b42303e4d8eb64a4cd0b7a77e35a9e865a55d61c57ef9"}, + {file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a635589201b18510ff988161b7b573f50c6a48fae9cb567657920ca82022b37"}, + {file = "grpcio-1.57.0-cp39-cp39-win32.whl", hash = "sha256:d78d8b86fcdfa1e4c21f8896614b6cc7ee01a2a758ec0c4382d662f2a62cf766"}, + {file = "grpcio-1.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:20ec6fc4ad47d1b6e12deec5045ec3cd5402d9a1597f738263e98f490fe07056"}, + {file = "grpcio-1.57.0.tar.gz", hash = "sha256:4b089f7ad1eb00a104078bab8015b0ed0ebcb3b589e527ab009c53893fd4e613"}, +] + +[package.extras] +protobuf = ["grpcio-tools (>=1.57.0)"] + +[[package]] +name = "grpcio-status" +version = "1.57.0" +description = "Status proto mapping for gRPC" optional = false python-versions = ">=3.6" +files = [ + {file = "grpcio-status-1.57.0.tar.gz", hash = "sha256:b098da99df1eebe58337f8f78e50df990273ccacc1226fddeb47c590e3df9e02"}, + {file = "grpcio_status-1.57.0-py3-none-any.whl", hash = "sha256:15d6af055914ebbc4ed17e55ebfb8e6bb17a45a57fea32e6af19978fb7844690"}, +] + +[package.dependencies] +googleapis-common-protos = ">=1.5.5" +grpcio = ">=1.57.0" +protobuf = ">=4.21.6" + +[[package]] +name = "httplib2" +version = "0.22.0" +description = "A comprehensive HTTP client library." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"}, + {file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"}, +] [package.dependencies] -six = ">=1.5.2" +pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0.2,<3.0.3 || >3.0.3,<4", markers = "python_version > \"3.0\""} + +[[package]] +name = "identify" +version = "2.5.27" +description = "File identification library for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "identify-2.5.27-py2.py3-none-any.whl", hash = "sha256:fdb527b2dfe24602809b2201e033c2a113d7bdf716db3ca8e3243f735dcecaba"}, + {file = "identify-2.5.27.tar.gz", hash = "sha256:287b75b04a0e22d727bc9a41f0d4f3c1bcada97490fa6eabb5b28f0e9097e733"}, +] [package.extras] -protobuf = ["grpcio-tools (>=1.44.0)"] +license = ["ukkonen"] [[package]] name = "idna" -version = "3.3" +version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" optional = false python-versions = ">=3.5" +files = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] [[package]] -name = "importlib-metadata" -version = "4.11.3" -description = "Read metadata from Python packages" -category = "main" +name = "importlib-resources" +version = "6.0.1" +description = "Read resources from Python packages" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" +files = [ + {file = "importlib_resources-6.0.1-py3-none-any.whl", hash = "sha256:134832a506243891221b88b4ae1213327eea96ceb4e407a00d790bb0626f45cf"}, + {file = "importlib_resources-6.0.1.tar.gz", hash = "sha256:4359457e42708462b9626a04657c6208ad799ceb41e5c58c57ffa0e6a098a5d4"}, +] [package.dependencies] -typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} -zipp = ">=0.5" +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} [package.extras] -docs = ["jaraco.packaging (>=9)", "rst.linker (>=1.9)", "sphinx"] -perf = ["ipython"] -testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] [[package]] -name = "jinja2" -version = "3.0.3" -description = "A very fast and expressive template engine." -category = "main" +name = "isort" +version = "5.12.0" +description = "A Python utility / library to sort Python imports." optional = false -python-versions = ">=3.6" +python-versions = ">=3.8.0" +files = [ + {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"}, + {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"}, +] + +[package.extras] +colors = ["colorama (>=0.4.3)"] +pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"] +plugins = ["setuptools"] +requirements-deprecated-finder = ["pip-api", "pipreqs"] + +[[package]] +name = "jsonschema" +version = "4.19.0" +description = "An implementation of JSON Schema validation for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jsonschema-4.19.0-py3-none-any.whl", hash = "sha256:043dc26a3845ff09d20e4420d6012a9c91c9aa8999fa184e7efcfeccb41e32cb"}, + {file = "jsonschema-4.19.0.tar.gz", hash = "sha256:6e1e7569ac13be8139b2dd2c21a55d350066ee3f80df06c608b398cdc6f30e8f"}, +] [package.dependencies] -MarkupSafe = ">=2.0" +attrs = ">=22.2.0" +importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} +jsonschema-specifications = ">=2023.03.6" +pkgutil-resolve-name = {version = ">=1.3.10", markers = "python_version < \"3.9\""} +referencing = ">=0.28.4" +rpds-py = ">=0.7.1" [package.extras] -i18n = ["Babel (>=2.7)"] +format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] +format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"] + +[[package]] +name = "jsonschema-specifications" +version = "2023.7.1" +description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jsonschema_specifications-2023.7.1-py3-none-any.whl", hash = "sha256:05adf340b659828a004220a9613be00fa3f223f2b82002e273dee62fd50524b1"}, + {file = "jsonschema_specifications-2023.7.1.tar.gz", hash = "sha256:c91a50404e88a1f6ba40636778e2ee08f6e24c5613fe4c53ac24578a5a7f72bb"}, +] + +[package.dependencies] +importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} +referencing = ">=0.28.0" [[package]] name = "loguru" -version = "0.6.0" +version = "0.7.0" description = "Python logging made (stupidly) simple" -category = "main" optional = false python-versions = ">=3.5" +files = [ + {file = "loguru-0.7.0-py3-none-any.whl", hash = "sha256:b93aa30099fa6860d4727f1b81f8718e965bb96253fa190fab2077aaad6d15d3"}, + {file = "loguru-0.7.0.tar.gz", hash = "sha256:1612053ced6ae84d7959dd7d5e431a0532642237ec21f7fd83ac73fe539e03e1"}, +] [package.dependencies] colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} [package.extras] -dev = ["Sphinx (>=4.1.1)", "black (>=19.10b0)", "colorama (>=0.3.4)", "docutils (==0.16)", "flake8 (>=3.7.7)", "isort (>=5.1.1)", "pytest (>=4.6.2)", "pytest-cov (>=2.7.1)", "sphinx-autobuild (>=0.7.1)", "sphinx-rtd-theme (>=0.4.3)", "tox (>=3.9.0)"] +dev = ["Sphinx (==5.3.0)", "colorama (==0.4.5)", "colorama (==0.4.6)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v0.990)", "pre-commit (==3.2.1)", "pytest (==6.1.2)", "pytest (==7.2.1)", "pytest-cov (==2.12.1)", "pytest-cov (==4.0.0)", "pytest-mypy-plugins (==1.10.1)", "pytest-mypy-plugins (==1.9.3)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.2.0)", "tox (==3.27.1)", "tox (==4.4.6)"] [[package]] -name = "markupsafe" -version = "2.1.1" -description = "Safely add untrusted strings to HTML/XML markup." -category = "main" +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + +[[package]] +name = "mccabe" +version = "0.7.0" +description = "McCabe checker, plugin for flake8" +optional = false +python-versions = ">=3.6" +files = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" optional = false python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + +[[package]] +name = "mslex" +version = "0.3.0" +description = "shlex for windows" +optional = false +python-versions = ">=3.5" +files = [ + {file = "mslex-0.3.0-py2.py3-none-any.whl", hash = "sha256:380cb14abf8fabf40e56df5c8b21a6d533dc5cbdcfe42406bbf08dda8f42e42a"}, + {file = "mslex-0.3.0.tar.gz", hash = "sha256:4a1ac3f25025cad78ad2fe499dd16d42759f7a3801645399cce5c404415daa97"}, +] + +[[package]] +name = "multidict" +version = "6.0.4" +description = "multidict implementation" +optional = true +python-versions = ">=3.7" +files = [ + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"}, + {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"}, + {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"}, + {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"}, + {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"}, + {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"}, + {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"}, + {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"}, + {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"}, + {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"}, + {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"}, + {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, + {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, +] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "nodeenv" +version = "1.8.0" +description = "Node.js virtual environment builder" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" +files = [ + {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"}, + {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"}, +] + +[package.dependencies] +setuptools = "*" [[package]] name = "numpy" -version = "1.21.5" -description = "NumPy is the fundamental package for array computing with Python." -category = "main" +version = "1.24.4" +description = "Fundamental package for array computing in Python" optional = false -python-versions = ">=3.7,<3.11" +python-versions = ">=3.8" +files = [ + {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, + {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"}, + {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"}, + {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"}, + {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"}, + {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"}, + {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"}, + {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"}, + {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"}, + {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, + {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, +] [[package]] name = "oauthlib" -version = "3.2.0" +version = "3.2.2" description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" -category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, + {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, +] [package.extras] rsa = ["cryptography (>=3.0.0)"] @@ -370,55 +1210,104 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] [[package]] name = "packaging" -version = "21.3" +version = "23.1" description = "Core utilities for Python packages" -category = "main" optional = false -python-versions = ">=3.6" - -[package.dependencies] -pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" +python-versions = ">=3.7" +files = [ + {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, + {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, +] [[package]] name = "pandas" -version = "1.3.5" +version = "2.0.3" description = "Powerful data structures for data analysis, time series, and statistics" -category = "main" optional = false -python-versions = ">=3.7.1" +python-versions = ">=3.8" +files = [ + {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, + {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"}, + {file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"}, + {file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"}, + {file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"}, + {file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"}, + {file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"}, + {file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"}, + {file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"}, + {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"}, + {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"}, +] [package.dependencies] numpy = [ - {version = ">=1.17.3", markers = "platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, - {version = ">=1.19.2", markers = "platform_machine == \"aarch64\" and python_version < \"3.10\""}, - {version = ">=1.20.0", markers = "platform_machine == \"arm64\" and python_version < \"3.10\""}, + {version = ">=1.20.3", markers = "python_version < \"3.10\""}, {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, ] -python-dateutil = ">=2.7.3" -pytz = ">=2017.3" +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.1" [package.extras] -test = ["hypothesis (>=3.58)", "pytest (>=6.0)", "pytest-xdist"] +all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"] +aws = ["s3fs (>=2021.08.0)"] +clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"] +compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"] +computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2021.07.0)"] +gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"] +hdf5 = ["tables (>=3.6.1)"] +html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"] +mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"] +spss = ["pyreadstat (>=1.1.2)"] +sql-other = ["SQLAlchemy (>=1.4.16)"] +test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.6.3)"] [[package]] name = "pandas-gbq" -version = "0.17.4" +version = "0.19.2" description = "Google BigQuery connector for pandas" -category = "main" optional = false -python-versions = ">=3.7, <3.11" +python-versions = ">=3.7" +files = [ + {file = "pandas-gbq-0.19.2.tar.gz", hash = "sha256:b0f7fa84a2be0fe767e33a008ca7e4ad9a9e3ac67255fd0a41fc19b503138447"}, + {file = "pandas_gbq-0.19.2-py2.py3-none-any.whl", hash = "sha256:0ef8da3e4088053a2bea069ed688992a44b52af67dadb97eee494b32a2147563"}, +] [package.dependencies] -db-dtypes = ">=0.3.1,<2.0.0" -google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev" -google-auth = ">=1.25.0" -google-auth-oauthlib = ">=0.0.1" -google-cloud-bigquery = ">=1.27.2,<2.4.0 || >=2.5.0,<4.0.0dev" -google-cloud-bigquery-storage = ">=1.1.0,<3.0.0dev" +db-dtypes = ">=1.0.4,<2.0.0" +google-api-core = ">=2.10.2,<3.0.0dev" +google-auth = ">=2.13.0" +google-auth-oauthlib = ">=0.7.0" +google-cloud-bigquery = ">=3.3.5,<4.0.0dev" +google-cloud-bigquery-storage = ">=2.16.2,<3.0.0dev" numpy = ">=1.16.6" -pandas = ">=0.24.2" -pyarrow = ">=3.0.0,<8.0dev" -pydata-google-auth = "*" +pandas = ">=1.1.4" +pyarrow = ">=3.0.0" +pydata-google-auth = ">=1.5.0" setuptools = "*" [package.extras] @@ -426,104 +1315,281 @@ tqdm = ["tqdm (>=4.23.0)"] [[package]] name = "pandavro" -version = "1.6.0" +version = "1.7.2" description = "The interface between Avro and pandas DataFrame" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +optional = true +python-versions = ">=3.6.1" +files = [ + {file = "pandavro-1.7.2-py3-none-any.whl", hash = "sha256:5b4a2fbc86fb2b102e5b2b24490084e4775a5ac546fc8981931abecf6bb4a34b"}, + {file = "pandavro-1.7.2.tar.gz", hash = "sha256:4f2b7b6823522f54e8bfe33c091fb29898349892b70634f46c928e6a42a76e69"}, +] [package.dependencies] -fastavro = ">=0.14.11" -numpy = ">=1.7.0" -pandas = ">=1.1.5" -six = ">=1.9" +fastavro = ">=1.5.1,<1.6.0" +numpy = ">=1.15.4" +pandas = ">=1.1" [package.extras] -tests = ["pytest"] +tests = ["pytest (==7.1.2)"] + +[[package]] +name = "pathspec" +version = "0.11.2" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pathspec-0.11.2-py3-none-any.whl", hash = "sha256:1d6ed233af05e679efb96b1851550ea95bbb64b7c490b0f5aa52996c11e92a20"}, + {file = "pathspec-0.11.2.tar.gz", hash = "sha256:e0d8d0ac2f12da61956eb2306b69f9469b42f4deb0f3cb6ed47b9cce9996ced3"}, +] + +[[package]] +name = "peewee" +version = "3.16.3" +description = "a little orm" +optional = false +python-versions = "*" +files = [ + {file = "peewee-3.16.3.tar.gz", hash = "sha256:12b30e931193bc37b11f7c2ac646e3f67125a8b1a543ad6ab37ad124c8df7d16"}, +] + +[[package]] +name = "pkgutil-resolve-name" +version = "1.3.10" +description = "Resolve a name to an object." +optional = false +python-versions = ">=3.6" +files = [ + {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, + {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, +] + +[[package]] +name = "platformdirs" +version = "3.10.0" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = ">=3.7" +files = [ + {file = "platformdirs-3.10.0-py3-none-any.whl", hash = "sha256:d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d"}, + {file = "platformdirs-3.10.0.tar.gz", hash = "sha256:b45696dab2d7cc691a3226759c0d3b00c47c8b6e293d96f6436f733303f77f6d"}, +] + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] + +[[package]] +name = "pre-commit" +version = "3.3.3" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pre_commit-3.3.3-py2.py3-none-any.whl", hash = "sha256:10badb65d6a38caff29703362271d7dca483d01da88f9d7e05d0b97171c136cb"}, + {file = "pre_commit-3.3.3.tar.gz", hash = "sha256:a2256f489cd913d575c145132ae196fe335da32d91a8294b7afe6622335dd023"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" [[package]] name = "proto-plus" -version = "1.20.3" +version = "1.22.3" description = "Beautiful, Pythonic protocol buffers." -category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "proto-plus-1.22.3.tar.gz", hash = "sha256:fdcd09713cbd42480740d2fe29c990f7fbd885a67efc328aa8be6ee3e9f76a6b"}, + {file = "proto_plus-1.22.3-py3-none-any.whl", hash = "sha256:a49cd903bc0b6ab41f76bf65510439d56ca76f868adf0274e738bfdd096894df"}, +] [package.dependencies] -protobuf = ">=3.19.0" +protobuf = ">=3.19.0,<5.0.0dev" [package.extras] -testing = ["google-api-core[grpc] (>=1.22.2)"] +testing = ["google-api-core[grpc] (>=1.31.5)"] [[package]] name = "protobuf" -version = "3.19.4" -description = "Protocol Buffers" -category = "main" +version = "4.24.1" +description = "" optional = false -python-versions = ">=3.5" +python-versions = ">=3.7" +files = [ + {file = "protobuf-4.24.1-cp310-abi3-win32.whl", hash = "sha256:d414199ca605eeb498adc4d2ba82aedc0379dca4a7c364ff9bc9a179aa28e71b"}, + {file = "protobuf-4.24.1-cp310-abi3-win_amd64.whl", hash = "sha256:5906c5e79ff50fe38b2d49d37db5874e3c8010826f2362f79996d83128a8ed9b"}, + {file = "protobuf-4.24.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:970c701ee16788d74f3de20938520d7a0aebc7e4fff37096a48804c80d2908cf"}, + {file = "protobuf-4.24.1-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:fc361148e902949dcb953bbcb148c99fe8f8854291ad01107e4120361849fd0e"}, + {file = "protobuf-4.24.1-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:5d32363d14aca6e5c9e9d5918ad8fb65b091b6df66740ae9de50ac3916055e43"}, + {file = "protobuf-4.24.1-cp37-cp37m-win32.whl", hash = "sha256:df015c47d6855b8efa0b9be706c70bf7f050a4d5ac6d37fb043fbd95157a0e25"}, + {file = "protobuf-4.24.1-cp37-cp37m-win_amd64.whl", hash = "sha256:d4af4fd9e9418e819be30f8df2a16e72fbad546a7576ac7f3653be92a6966d30"}, + {file = "protobuf-4.24.1-cp38-cp38-win32.whl", hash = "sha256:302e8752c760549ed4c7a508abc86b25d46553c81989343782809e1a062a2ef9"}, + {file = "protobuf-4.24.1-cp38-cp38-win_amd64.whl", hash = "sha256:06437f0d4bb0d5f29e3d392aba69600188d4be5ad1e0a3370e581a9bf75a3081"}, + {file = "protobuf-4.24.1-cp39-cp39-win32.whl", hash = "sha256:0b2b224e9541fe9f046dd7317d05f08769c332b7e4c54d93c7f0f372dedb0b1a"}, + {file = "protobuf-4.24.1-cp39-cp39-win_amd64.whl", hash = "sha256:bd39b9094a4cc003a1f911b847ab379f89059f478c0b611ba1215053e295132e"}, + {file = "protobuf-4.24.1-py3-none-any.whl", hash = "sha256:55dd644adc27d2a624339332755fe077c7f26971045b469ebb9732a69ce1f2ca"}, + {file = "protobuf-4.24.1.tar.gz", hash = "sha256:44837a5ed9c9418ad5d502f89f28ba102e9cd172b6668bc813f21716f9273348"}, +] [[package]] -name = "pyaml" -version = "20.4.0" -description = "PyYAML-based module to produce pretty and readable YAML-serialized data" -category = "main" +name = "psutil" +version = "5.9.5" +description = "Cross-platform lib for process and system monitoring in Python." optional = false -python-versions = "*" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "psutil-5.9.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:be8929ce4313f9f8146caad4272f6abb8bf99fc6cf59344a3167ecd74f4f203f"}, + {file = "psutil-5.9.5-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ab8ed1a1d77c95453db1ae00a3f9c50227ebd955437bcf2a574ba8adbf6a74d5"}, + {file = "psutil-5.9.5-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:4aef137f3345082a3d3232187aeb4ac4ef959ba3d7c10c33dd73763fbc063da4"}, + {file = "psutil-5.9.5-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:ea8518d152174e1249c4f2a1c89e3e6065941df2fa13a1ab45327716a23c2b48"}, + {file = "psutil-5.9.5-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:acf2aef9391710afded549ff602b5887d7a2349831ae4c26be7c807c0a39fac4"}, + {file = "psutil-5.9.5-cp27-none-win32.whl", hash = "sha256:5b9b8cb93f507e8dbaf22af6a2fd0ccbe8244bf30b1baad6b3954e935157ae3f"}, + {file = "psutil-5.9.5-cp27-none-win_amd64.whl", hash = "sha256:8c5f7c5a052d1d567db4ddd231a9d27a74e8e4a9c3f44b1032762bd7b9fdcd42"}, + {file = "psutil-5.9.5-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:3c6f686f4225553615612f6d9bc21f1c0e305f75d7d8454f9b46e901778e7217"}, + {file = "psutil-5.9.5-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a7dd9997128a0d928ed4fb2c2d57e5102bb6089027939f3b722f3a210f9a8da"}, + {file = "psutil-5.9.5-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89518112647f1276b03ca97b65cc7f64ca587b1eb0278383017c2a0dcc26cbe4"}, + {file = "psutil-5.9.5-cp36-abi3-win32.whl", hash = "sha256:104a5cc0e31baa2bcf67900be36acde157756b9c44017b86b2c049f11957887d"}, + {file = "psutil-5.9.5-cp36-abi3-win_amd64.whl", hash = "sha256:b258c0c1c9d145a1d5ceffab1134441c4c5113b2417fafff7315a917a026c3c9"}, + {file = "psutil-5.9.5-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:c607bb3b57dc779d55e1554846352b4e358c10fff3abf3514a7a6601beebdb30"}, + {file = "psutil-5.9.5.tar.gz", hash = "sha256:5410638e4df39c54d957fc51ce03048acd8e6d60abc0f5107af51e5fb566eb3c"}, +] -[package.dependencies] -PyYAML = "*" +[package.extras] +test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] [[package]] name = "pyarrow" -version = "6.0.0" +version = "13.0.0" description = "Python library for Apache Arrow" -category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" +files = [ + {file = "pyarrow-13.0.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:1afcc2c33f31f6fb25c92d50a86b7a9f076d38acbcb6f9e74349636109550148"}, + {file = "pyarrow-13.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:70fa38cdc66b2fc1349a082987f2b499d51d072faaa6b600f71931150de2e0e3"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd57b13a6466822498238877892a9b287b0a58c2e81e4bdb0b596dbb151cbb73"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ce69f7bf01de2e2764e14df45b8404fc6f1a5ed9871e8e08a12169f87b7a26"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:588f0d2da6cf1b1680974d63be09a6530fd1bd825dc87f76e162404779a157dc"}, + {file = "pyarrow-13.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:6241afd72b628787b4abea39e238e3ff9f34165273fad306c7acf780dd850956"}, + {file = "pyarrow-13.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:fda7857e35993673fcda603c07d43889fca60a5b254052a462653f8656c64f44"}, + {file = "pyarrow-13.0.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:aac0ae0146a9bfa5e12d87dda89d9ef7c57a96210b899459fc2f785303dcbb67"}, + {file = "pyarrow-13.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d7759994217c86c161c6a8060509cfdf782b952163569606bb373828afdd82e8"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:868a073fd0ff6468ae7d869b5fc1f54de5c4255b37f44fb890385eb68b68f95d"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51be67e29f3cfcde263a113c28e96aa04362ed8229cb7c6e5f5c719003659d33"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:d1b4e7176443d12610874bb84d0060bf080f000ea9ed7c84b2801df851320295"}, + {file = "pyarrow-13.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:69b6f9a089d116a82c3ed819eea8fe67dae6105f0d81eaf0fdd5e60d0c6e0944"}, + {file = "pyarrow-13.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ab1268db81aeb241200e321e220e7cd769762f386f92f61b898352dd27e402ce"}, + {file = "pyarrow-13.0.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:ee7490f0f3f16a6c38f8c680949551053c8194e68de5046e6c288e396dccee80"}, + {file = "pyarrow-13.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e3ad79455c197a36eefbd90ad4aa832bece7f830a64396c15c61a0985e337287"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68fcd2dc1b7d9310b29a15949cdd0cb9bc34b6de767aff979ebf546020bf0ba0"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc6fd330fd574c51d10638e63c0d00ab456498fc804c9d01f2a61b9264f2c5b2"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:e66442e084979a97bb66939e18f7b8709e4ac5f887e636aba29486ffbf373763"}, + {file = "pyarrow-13.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:0f6eff839a9e40e9c5610d3ff8c5bdd2f10303408312caf4c8003285d0b49565"}, + {file = "pyarrow-13.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b30a27f1cddf5c6efcb67e598d7823a1e253d743d92ac32ec1eb4b6a1417867"}, + {file = "pyarrow-13.0.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:09552dad5cf3de2dc0aba1c7c4b470754c69bd821f5faafc3d774bedc3b04bb7"}, + {file = "pyarrow-13.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3896ae6c205d73ad192d2fc1489cd0edfab9f12867c85b4c277af4d37383c18c"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6647444b21cb5e68b593b970b2a9a07748dd74ea457c7dadaa15fd469c48ada1"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47663efc9c395e31d09c6aacfa860f4473815ad6804311c5433f7085415d62a7"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:b9ba6b6d34bd2563345488cf444510588ea42ad5613df3b3509f48eb80250afd"}, + {file = "pyarrow-13.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:d00d374a5625beeb448a7fa23060df79adb596074beb3ddc1838adb647b6ef09"}, + {file = "pyarrow-13.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:c51afd87c35c8331b56f796eff954b9c7f8d4b7fef5903daf4e05fcf017d23a8"}, + {file = "pyarrow-13.0.0.tar.gz", hash = "sha256:83333726e83ed44b0ac94d8d7a21bbdee4a05029c3b1e8db58a863eec8fd8a33"}, +] [package.dependencies] numpy = ">=1.16.6" [[package]] name = "pyasn1" -version = "0.4.8" -description = "ASN.1 types and codecs" -category = "main" +version = "0.5.0" +description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" optional = false -python-versions = "*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ + {file = "pyasn1-0.5.0-py2.py3-none-any.whl", hash = "sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57"}, + {file = "pyasn1-0.5.0.tar.gz", hash = "sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"}, +] [[package]] name = "pyasn1-modules" -version = "0.2.8" -description = "A collection of ASN.1-based protocols modules." -category = "main" +version = "0.3.0" +description = "A collection of ASN.1-based protocols modules" optional = false -python-versions = "*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ + {file = "pyasn1_modules-0.3.0-py2.py3-none-any.whl", hash = "sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d"}, + {file = "pyasn1_modules-0.3.0.tar.gz", hash = "sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c"}, +] [package.dependencies] -pyasn1 = ">=0.4.6,<0.5.0" +pyasn1 = ">=0.4.6,<0.6.0" + +[[package]] +name = "pycodestyle" +version = "2.11.0" +description = "Python style guide checker" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycodestyle-2.11.0-py2.py3-none-any.whl", hash = "sha256:5d1013ba8dc7895b548be5afb05740ca82454fd899971563d2ef625d090326f8"}, + {file = "pycodestyle-2.11.0.tar.gz", hash = "sha256:259bcc17857d8a8b3b4a2327324b79e5f020a13c16074670f9c8c8f872ea76d0"}, +] [[package]] name = "pydata-google-auth" -version = "1.4.0" +version = "1.8.2" description = "PyData helpers for authenticating to Google APIs" -category = "main" optional = false python-versions = "*" +files = [ + {file = "pydata-google-auth-1.8.2.tar.gz", hash = "sha256:547b6c0fbea657dcecd50887c5db8640ebec062a59a2b88e8ff8e53a04818303"}, + {file = "pydata_google_auth-1.8.2-py2.py3-none-any.whl", hash = "sha256:a9dce59af4a170ea60c4b2ebbc83ee1f74d34255a4f97b2469ae9a4a0dc98e99"}, +] [package.dependencies] google-auth = {version = ">=1.25.0,<3.0dev", markers = "python_version >= \"3.6\""} google-auth-oauthlib = {version = ">=0.4.0", markers = "python_version >= \"3.6\""} setuptools = "*" +[[package]] +name = "pyflakes" +version = "3.1.0" +description = "passive checker of Python programs" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"}, + {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, +] + +[[package]] +name = "pygments" +version = "2.16.1" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "Pygments-2.16.1-py3-none-any.whl", hash = "sha256:13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692"}, + {file = "Pygments-2.16.1.tar.gz", hash = "sha256:1daff0494820c69bc8941e407aa20f577374ee88364ee10a98fdbe0aece96e29"}, +] + +[package.extras] +plugins = ["importlib-metadata"] + [[package]] name = "pyparsing" -version = "3.0.7" -description = "Python parsing module" -category = "main" +version = "3.1.1" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" optional = false -python-versions = ">=3.6" +python-versions = ">=3.6.8" +files = [ + {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, + {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, +] [package.extras] diagrams = ["jinja2", "railroad-diagrams"] @@ -532,68 +1598,139 @@ diagrams = ["jinja2", "railroad-diagrams"] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] [package.dependencies] six = ">=1.5" [[package]] -name = "python-slugify" -version = "6.1.1" -description = "A Python slugify application that also handles Unicode" -category = "main" +name = "python-lsp-jsonrpc" +version = "1.0.0" +description = "JSON RPC 2.0 server library" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +python-versions = "*" +files = [ + {file = "python-lsp-jsonrpc-1.0.0.tar.gz", hash = "sha256:7bec170733db628d3506ea3a5288ff76aa33c70215ed223abdb0d95e957660bd"}, + {file = "python_lsp_jsonrpc-1.0.0-py3-none-any.whl", hash = "sha256:079b143be64b0a378bdb21dff5e28a8c1393fe7e8a654ef068322d754e545fc7"}, +] [package.dependencies] -text-unidecode = ">=1.3" +ujson = ">=3.0.0" [package.extras] -unidecode = ["Unidecode (>=1.1.1)"] +test = ["coverage", "pycodestyle", "pyflakes", "pylint", "pytest", "pytest-cov"] [[package]] name = "pytz" -version = "2021.3" +version = "2023.3" description = "World timezone definitions, modern and historical" -category = "main" optional = false python-versions = "*" +files = [ + {file = "pytz-2023.3-py2.py3-none-any.whl", hash = "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"}, + {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, +] [[package]] name = "pyyaml" -version = "6.0" +version = "6.0.1" description = "YAML parser and emitter for Python" -category = "main" optional = false python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "referencing" +version = "0.30.2" +description = "JSON Referencing + Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "referencing-0.30.2-py3-none-any.whl", hash = "sha256:449b6669b6121a9e96a7f9e410b245d471e8d48964c67113ce9afe50c8dd7bdf"}, + {file = "referencing-0.30.2.tar.gz", hash = "sha256:794ad8003c65938edcdbc027f1933215e0d0ccc0291e3ce20a4d87432b59efc0"}, +] + +[package.dependencies] +attrs = ">=22.2.0" +rpds-py = ">=0.7.0" [[package]] name = "requests" -version = "2.27.1" +version = "2.31.0" description = "Python HTTP for Humans." -category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] [package.dependencies] certifi = ">=2017.4.17" -charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""} -idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""} -urllib3 = ">=1.21.1,<1.27" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" [package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] -use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "requests-oauthlib" version = "1.3.1" description = "OAuthlib authentication support for Requests." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"}, + {file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"}, +] [package.dependencies] oauthlib = ">=3.0.0" @@ -602,736 +1739,621 @@ requests = ">=2.0.0" [package.extras] rsa = ["oauthlib[signedtoken] (>=3.0.0)"] +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +description = "A utility belt for advanced users of python-requests" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, + {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, +] + +[package.dependencies] +requests = ">=2.0.1,<3.0.0" + +[[package]] +name = "rich" +version = "13.5.2" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "rich-13.5.2-py3-none-any.whl", hash = "sha256:146a90b3b6b47cac4a73c12866a499e9817426423f57c5a66949c086191a8808"}, + {file = "rich-13.5.2.tar.gz", hash = "sha256:fb9d6c0a0f643c99eed3875b5377a184132ba9be4d61516a55273d3554d75a39"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" +typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""} + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + +[[package]] +name = "rpds-py" +version = "0.9.2" +description = "Python bindings to Rust's persistent data structures (rpds)" +optional = false +python-versions = ">=3.8" +files = [ + {file = "rpds_py-0.9.2-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:ab6919a09c055c9b092798ce18c6c4adf49d24d4d9e43a92b257e3f2548231e7"}, + {file = "rpds_py-0.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d55777a80f78dd09410bd84ff8c95ee05519f41113b2df90a69622f5540c4f8b"}, + {file = "rpds_py-0.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a216b26e5af0a8e265d4efd65d3bcec5fba6b26909014effe20cd302fd1138fa"}, + {file = "rpds_py-0.9.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:29cd8bfb2d716366a035913ced99188a79b623a3512292963d84d3e06e63b496"}, + {file = "rpds_py-0.9.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44659b1f326214950a8204a248ca6199535e73a694be8d3e0e869f820767f12f"}, + {file = "rpds_py-0.9.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:745f5a43fdd7d6d25a53ab1a99979e7f8ea419dfefebcab0a5a1e9095490ee5e"}, + {file = "rpds_py-0.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a987578ac5214f18b99d1f2a3851cba5b09f4a689818a106c23dbad0dfeb760f"}, + {file = "rpds_py-0.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bf4151acb541b6e895354f6ff9ac06995ad9e4175cbc6d30aaed08856558201f"}, + {file = "rpds_py-0.9.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:03421628f0dc10a4119d714a17f646e2837126a25ac7a256bdf7c3943400f67f"}, + {file = "rpds_py-0.9.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:13b602dc3e8dff3063734f02dcf05111e887f301fdda74151a93dbbc249930fe"}, + {file = "rpds_py-0.9.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fae5cb554b604b3f9e2c608241b5d8d303e410d7dfb6d397c335f983495ce7f6"}, + {file = "rpds_py-0.9.2-cp310-none-win32.whl", hash = "sha256:47c5f58a8e0c2c920cc7783113df2fc4ff12bf3a411d985012f145e9242a2764"}, + {file = "rpds_py-0.9.2-cp310-none-win_amd64.whl", hash = "sha256:4ea6b73c22d8182dff91155af018b11aac9ff7eca085750455c5990cb1cfae6e"}, + {file = "rpds_py-0.9.2-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:e564d2238512c5ef5e9d79338ab77f1cbbda6c2d541ad41b2af445fb200385e3"}, + {file = "rpds_py-0.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f411330a6376fb50e5b7a3e66894e4a39e60ca2e17dce258d53768fea06a37bd"}, + {file = "rpds_py-0.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e7521f5af0233e89939ad626b15278c71b69dc1dfccaa7b97bd4cdf96536bb7"}, + {file = "rpds_py-0.9.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8d3335c03100a073883857e91db9f2e0ef8a1cf42dc0369cbb9151c149dbbc1b"}, + {file = "rpds_py-0.9.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d25b1c1096ef0447355f7293fbe9ad740f7c47ae032c2884113f8e87660d8f6e"}, + {file = "rpds_py-0.9.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a5d3fbd02efd9cf6a8ffc2f17b53a33542f6b154e88dd7b42ef4a4c0700fdad"}, + {file = "rpds_py-0.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5934e2833afeaf36bd1eadb57256239785f5af0220ed8d21c2896ec4d3a765f"}, + {file = "rpds_py-0.9.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:095b460e117685867d45548fbd8598a8d9999227e9061ee7f012d9d264e6048d"}, + {file = "rpds_py-0.9.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:91378d9f4151adc223d584489591dbb79f78814c0734a7c3bfa9c9e09978121c"}, + {file = "rpds_py-0.9.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:24a81c177379300220e907e9b864107614b144f6c2a15ed5c3450e19cf536fae"}, + {file = "rpds_py-0.9.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:de0b6eceb46141984671802d412568d22c6bacc9b230174f9e55fc72ef4f57de"}, + {file = "rpds_py-0.9.2-cp311-none-win32.whl", hash = "sha256:700375326ed641f3d9d32060a91513ad668bcb7e2cffb18415c399acb25de2ab"}, + {file = "rpds_py-0.9.2-cp311-none-win_amd64.whl", hash = "sha256:0766babfcf941db8607bdaf82569ec38107dbb03c7f0b72604a0b346b6eb3298"}, + {file = "rpds_py-0.9.2-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:b1440c291db3f98a914e1afd9d6541e8fc60b4c3aab1a9008d03da4651e67386"}, + {file = "rpds_py-0.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0f2996fbac8e0b77fd67102becb9229986396e051f33dbceada3debaacc7033f"}, + {file = "rpds_py-0.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f30d205755566a25f2ae0382944fcae2f350500ae4df4e795efa9e850821d82"}, + {file = "rpds_py-0.9.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:159fba751a1e6b1c69244e23ba6c28f879a8758a3e992ed056d86d74a194a0f3"}, + {file = "rpds_py-0.9.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1f044792e1adcea82468a72310c66a7f08728d72a244730d14880cd1dabe36b"}, + {file = "rpds_py-0.9.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9251eb8aa82e6cf88510530b29eef4fac825a2b709baf5b94a6094894f252387"}, + {file = "rpds_py-0.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01899794b654e616c8625b194ddd1e5b51ef5b60ed61baa7a2d9c2ad7b2a4238"}, + {file = "rpds_py-0.9.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b0c43f8ae8f6be1d605b0465671124aa8d6a0e40f1fb81dcea28b7e3d87ca1e1"}, + {file = "rpds_py-0.9.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:207f57c402d1f8712618f737356e4b6f35253b6d20a324d9a47cb9f38ee43a6b"}, + {file = "rpds_py-0.9.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b52e7c5ae35b00566d244ffefba0f46bb6bec749a50412acf42b1c3f402e2c90"}, + {file = "rpds_py-0.9.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:978fa96dbb005d599ec4fd9ed301b1cc45f1a8f7982d4793faf20b404b56677d"}, + {file = "rpds_py-0.9.2-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:6aa8326a4a608e1c28da191edd7c924dff445251b94653988efb059b16577a4d"}, + {file = "rpds_py-0.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:aad51239bee6bff6823bbbdc8ad85136c6125542bbc609e035ab98ca1e32a192"}, + {file = "rpds_py-0.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4bd4dc3602370679c2dfb818d9c97b1137d4dd412230cfecd3c66a1bf388a196"}, + {file = "rpds_py-0.9.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dd9da77c6ec1f258387957b754f0df60766ac23ed698b61941ba9acccd3284d1"}, + {file = "rpds_py-0.9.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:190ca6f55042ea4649ed19c9093a9be9d63cd8a97880106747d7147f88a49d18"}, + {file = "rpds_py-0.9.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:876bf9ed62323bc7dcfc261dbc5572c996ef26fe6406b0ff985cbcf460fc8a4c"}, + {file = "rpds_py-0.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa2818759aba55df50592ecbc95ebcdc99917fa7b55cc6796235b04193eb3c55"}, + {file = "rpds_py-0.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9ea4d00850ef1e917815e59b078ecb338f6a8efda23369677c54a5825dbebb55"}, + {file = "rpds_py-0.9.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:5855c85eb8b8a968a74dc7fb014c9166a05e7e7a8377fb91d78512900aadd13d"}, + {file = "rpds_py-0.9.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:14c408e9d1a80dcb45c05a5149e5961aadb912fff42ca1dd9b68c0044904eb32"}, + {file = "rpds_py-0.9.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:65a0583c43d9f22cb2130c7b110e695fff834fd5e832a776a107197e59a1898e"}, + {file = "rpds_py-0.9.2-cp38-none-win32.whl", hash = "sha256:71f2f7715935a61fa3e4ae91d91b67e571aeb5cb5d10331ab681256bda2ad920"}, + {file = "rpds_py-0.9.2-cp38-none-win_amd64.whl", hash = "sha256:674c704605092e3ebbbd13687b09c9f78c362a4bc710343efe37a91457123044"}, + {file = "rpds_py-0.9.2-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:07e2c54bef6838fa44c48dfbc8234e8e2466d851124b551fc4e07a1cfeb37260"}, + {file = "rpds_py-0.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f7fdf55283ad38c33e35e2855565361f4bf0abd02470b8ab28d499c663bc5d7c"}, + {file = "rpds_py-0.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:890ba852c16ace6ed9f90e8670f2c1c178d96510a21b06d2fa12d8783a905193"}, + {file = "rpds_py-0.9.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:50025635ba8b629a86d9d5474e650da304cb46bbb4d18690532dd79341467846"}, + {file = "rpds_py-0.9.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:517cbf6e67ae3623c5127206489d69eb2bdb27239a3c3cc559350ef52a3bbf0b"}, + {file = "rpds_py-0.9.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0836d71ca19071090d524739420a61580f3f894618d10b666cf3d9a1688355b1"}, + {file = "rpds_py-0.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c439fd54b2b9053717cca3de9583be6584b384d88d045f97d409f0ca867d80f"}, + {file = "rpds_py-0.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f68996a3b3dc9335037f82754f9cdbe3a95db42bde571d8c3be26cc6245f2324"}, + {file = "rpds_py-0.9.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7d68dc8acded354c972116f59b5eb2e5864432948e098c19fe6994926d8e15c3"}, + {file = "rpds_py-0.9.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f963c6b1218b96db85fc37a9f0851eaf8b9040aa46dec112611697a7023da535"}, + {file = "rpds_py-0.9.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5a46859d7f947061b4010e554ccd1791467d1b1759f2dc2ec9055fa239f1bc26"}, + {file = "rpds_py-0.9.2-cp39-none-win32.whl", hash = "sha256:e07e5dbf8a83c66783a9fe2d4566968ea8c161199680e8ad38d53e075df5f0d0"}, + {file = "rpds_py-0.9.2-cp39-none-win_amd64.whl", hash = "sha256:682726178138ea45a0766907957b60f3a1bf3acdf212436be9733f28b6c5af3c"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:196cb208825a8b9c8fc360dc0f87993b8b260038615230242bf18ec84447c08d"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:c7671d45530fcb6d5e22fd40c97e1e1e01965fc298cbda523bb640f3d923b387"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83b32f0940adec65099f3b1c215ef7f1d025d13ff947975a055989cb7fd019a4"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7f67da97f5b9eac838b6980fc6da268622e91f8960e083a34533ca710bec8611"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:03975db5f103997904c37e804e5f340c8fdabbb5883f26ee50a255d664eed58c"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:987b06d1cdb28f88a42e4fb8a87f094e43f3c435ed8e486533aea0bf2e53d931"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c861a7e4aef15ff91233751619ce3a3d2b9e5877e0fcd76f9ea4f6847183aa16"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:02938432352359805b6da099c9c95c8a0547fe4b274ce8f1a91677401bb9a45f"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:ef1f08f2a924837e112cba2953e15aacfccbbfcd773b4b9b4723f8f2ddded08e"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:35da5cc5cb37c04c4ee03128ad59b8c3941a1e5cd398d78c37f716f32a9b7f67"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:141acb9d4ccc04e704e5992d35472f78c35af047fa0cfae2923835d153f091be"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:79f594919d2c1a0cc17d1988a6adaf9a2f000d2e1048f71f298b056b1018e872"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:a06418fe1155e72e16dddc68bb3780ae44cebb2912fbd8bb6ff9161de56e1798"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b2eb034c94b0b96d5eddb290b7b5198460e2d5d0c421751713953a9c4e47d10"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8b08605d248b974eb02f40bdcd1a35d3924c83a2a5e8f5d0fa5af852c4d960af"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a0805911caedfe2736935250be5008b261f10a729a303f676d3d5fea6900c96a"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ab2299e3f92aa5417d5e16bb45bb4586171c1327568f638e8453c9f8d9e0f020"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c8d7594e38cf98d8a7df25b440f684b510cf4627fe038c297a87496d10a174f"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8b9ec12ad5f0a4625db34db7e0005be2632c1013b253a4a60e8302ad4d462afd"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:1fcdee18fea97238ed17ab6478c66b2095e4ae7177e35fb71fbe561a27adf620"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:933a7d5cd4b84f959aedeb84f2030f0a01d63ae6cf256629af3081cf3e3426e8"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:686ba516e02db6d6f8c279d1641f7067ebb5dc58b1d0536c4aaebb7bf01cdc5d"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:0173c0444bec0a3d7d848eaeca2d8bd32a1b43f3d3fde6617aac3731fa4be05f"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:d576c3ef8c7b2d560e301eb33891d1944d965a4d7a2eacb6332eee8a71827db6"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed89861ee8c8c47d6beb742a602f912b1bb64f598b1e2f3d758948721d44d468"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1054a08e818f8e18910f1bee731583fe8f899b0a0a5044c6e680ceea34f93876"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99e7c4bb27ff1aab90dcc3e9d37ee5af0231ed98d99cb6f5250de28889a3d502"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c545d9d14d47be716495076b659db179206e3fd997769bc01e2d550eeb685596"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9039a11bca3c41be5a58282ed81ae422fa680409022b996032a43badef2a3752"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fb39aca7a64ad0c9490adfa719dbeeb87d13be137ca189d2564e596f8ba32c07"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:2d8b3b3a2ce0eaa00c5bbbb60b6713e94e7e0becab7b3db6c5c77f979e8ed1f1"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:99b1c16f732b3a9971406fbfe18468592c5a3529585a45a35adbc1389a529a03"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:c27ee01a6c3223025f4badd533bea5e87c988cb0ba2811b690395dfe16088cfe"}, + {file = "rpds_py-0.9.2.tar.gz", hash = "sha256:8d70e8f14900f2657c249ea4def963bed86a29b81f81f5b76b5a9215680de945"}, +] + [[package]] name = "rsa" -version = "4.8" +version = "4.9" description = "Pure-Python RSA implementation" -category = "main" optional = false python-versions = ">=3.6,<4" +files = [ + {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, + {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, +] [package.dependencies] pyasn1 = ">=0.1.3" [[package]] -name = "ruamel.yaml" -version = "0.17.10" +name = "ruamel-yaml" +version = "0.17.32" description = "ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order" -category = "main" optional = false python-versions = ">=3" +files = [ + {file = "ruamel.yaml-0.17.32-py3-none-any.whl", hash = "sha256:23cd2ed620231677564646b0c6a89d138b6822a0d78656df7abda5879ec4f447"}, + {file = "ruamel.yaml-0.17.32.tar.gz", hash = "sha256:ec939063761914e14542972a5cba6d33c23b0859ab6342f61cf070cfc600efc2"}, +] [package.dependencies] -"ruamel.yaml.clib" = {version = ">=0.1.2", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.10\""} +"ruamel.yaml.clib" = {version = ">=0.2.7", markers = "platform_python_implementation == \"CPython\" and python_version < \"3.12\""} [package.extras] docs = ["ryd"] jinja2 = ["ruamel.yaml.jinja2 (>=0.2)"] [[package]] -name = "ruamel.yaml.clib" -version = "0.2.6" +name = "ruamel-yaml-clib" +version = "0.2.7" description = "C version of reader, parser and emitter for ruamel.yaml derived from libyaml" -category = "main" optional = false python-versions = ">=3.5" +files = [ + {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d5859983f26d8cd7bb5c287ef452e8aacc86501487634573d260968f753e1d71"}, + {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:debc87a9516b237d0466a711b18b6ebeb17ba9f391eb7f91c649c5c4ec5006c7"}, + {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:df5828871e6648db72d1c19b4bd24819b80a755c4541d3409f0f7acd0f335c80"}, + {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:efa08d63ef03d079dcae1dfe334f6c8847ba8b645d08df286358b1f5293d24ab"}, + {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-win32.whl", hash = "sha256:763d65baa3b952479c4e972669f679fe490eee058d5aa85da483ebae2009d231"}, + {file = "ruamel.yaml.clib-0.2.7-cp310-cp310-win_amd64.whl", hash = "sha256:d000f258cf42fec2b1bbf2863c61d7b8918d31ffee905da62dede869254d3b8a"}, + {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:045e0626baf1c52e5527bd5db361bc83180faaba2ff586e763d3d5982a876a9e"}, + {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:1a6391a7cabb7641c32517539ca42cf84b87b667bad38b78d4d42dd23e957c81"}, + {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:9c7617df90c1365638916b98cdd9be833d31d337dbcd722485597b43c4a215bf"}, + {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:41d0f1fa4c6830176eef5b276af04c89320ea616655d01327d5ce65e50575c94"}, + {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-win32.whl", hash = "sha256:f6d3d39611ac2e4f62c3128a9eed45f19a6608670c5a2f4f07f24e8de3441d38"}, + {file = "ruamel.yaml.clib-0.2.7-cp311-cp311-win_amd64.whl", hash = "sha256:da538167284de58a52109a9b89b8f6a53ff8437dd6dc26d33b57bf6699153122"}, + {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:4b3a93bb9bc662fc1f99c5c3ea8e623d8b23ad22f861eb6fce9377ac07ad6072"}, + {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-macosx_12_0_arm64.whl", hash = "sha256:a234a20ae07e8469da311e182e70ef6b199d0fbeb6c6cc2901204dd87fb867e8"}, + {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:15910ef4f3e537eea7fe45f8a5d19997479940d9196f357152a09031c5be59f3"}, + {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:370445fd795706fd291ab00c9df38a0caed0f17a6fb46b0f607668ecb16ce763"}, + {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-win32.whl", hash = "sha256:ecdf1a604009bd35c674b9225a8fa609e0282d9b896c03dd441a91e5f53b534e"}, + {file = "ruamel.yaml.clib-0.2.7-cp36-cp36m-win_amd64.whl", hash = "sha256:f34019dced51047d6f70cb9383b2ae2853b7fc4dce65129a5acd49f4f9256646"}, + {file = "ruamel.yaml.clib-0.2.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2aa261c29a5545adfef9296b7e33941f46aa5bbd21164228e833412af4c9c75f"}, + {file = "ruamel.yaml.clib-0.2.7-cp37-cp37m-macosx_12_0_arm64.whl", hash = "sha256:f01da5790e95815eb5a8a138508c01c758e5f5bc0ce4286c4f7028b8dd7ac3d0"}, + {file = "ruamel.yaml.clib-0.2.7-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:40d030e2329ce5286d6b231b8726959ebbe0404c92f0a578c0e2482182e38282"}, + {file = "ruamel.yaml.clib-0.2.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:c3ca1fbba4ae962521e5eb66d72998b51f0f4d0f608d3c0347a48e1af262efa7"}, + {file = "ruamel.yaml.clib-0.2.7-cp37-cp37m-win32.whl", hash = "sha256:7bdb4c06b063f6fd55e472e201317a3bb6cdeeee5d5a38512ea5c01e1acbdd93"}, + {file = "ruamel.yaml.clib-0.2.7-cp37-cp37m-win_amd64.whl", hash = "sha256:be2a7ad8fd8f7442b24323d24ba0b56c51219513cfa45b9ada3b87b76c374d4b"}, + {file = "ruamel.yaml.clib-0.2.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:91a789b4aa0097b78c93e3dc4b40040ba55bef518f84a40d4442f713b4094acb"}, + {file = "ruamel.yaml.clib-0.2.7-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:99e77daab5d13a48a4054803d052ff40780278240a902b880dd37a51ba01a307"}, + {file = "ruamel.yaml.clib-0.2.7-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:3243f48ecd450eddadc2d11b5feb08aca941b5cd98c9b1db14b2fd128be8c697"}, + {file = "ruamel.yaml.clib-0.2.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:8831a2cedcd0f0927f788c5bdf6567d9dc9cc235646a434986a852af1cb54b4b"}, + {file = "ruamel.yaml.clib-0.2.7-cp38-cp38-win32.whl", hash = "sha256:3110a99e0f94a4a3470ff67fc20d3f96c25b13d24c6980ff841e82bafe827cac"}, + {file = "ruamel.yaml.clib-0.2.7-cp38-cp38-win_amd64.whl", hash = "sha256:92460ce908546ab69770b2e576e4f99fbb4ce6ab4b245345a3869a0a0410488f"}, + {file = "ruamel.yaml.clib-0.2.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5bc0667c1eb8f83a3752b71b9c4ba55ef7c7058ae57022dd9b29065186a113d9"}, + {file = "ruamel.yaml.clib-0.2.7-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:4a4d8d417868d68b979076a9be6a38c676eca060785abaa6709c7b31593c35d1"}, + {file = "ruamel.yaml.clib-0.2.7-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:bf9a6bc4a0221538b1a7de3ed7bca4c93c02346853f44e1cd764be0023cd3640"}, + {file = "ruamel.yaml.clib-0.2.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a7b301ff08055d73223058b5c46c55638917f04d21577c95e00e0c4d79201a6b"}, + {file = "ruamel.yaml.clib-0.2.7-cp39-cp39-win32.whl", hash = "sha256:d5e51e2901ec2366b79f16c2299a03e74ba4531ddcfacc1416639c557aef0ad8"}, + {file = "ruamel.yaml.clib-0.2.7-cp39-cp39-win_amd64.whl", hash = "sha256:184faeaec61dbaa3cace407cffc5819f7b977e75360e8d5ca19461cd851a5fc5"}, + {file = "ruamel.yaml.clib-0.2.7.tar.gz", hash = "sha256:1f08fd5a2bea9c4180db71678e850b995d2a5f4537be0e94557668cf0f5f9497"}, +] [[package]] -name = "setuptools" -version = "65.5.1" -description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "main" +name = "semgrep" +version = "1.36.0" +description = "Lightweight static analysis for many languages. Find bug variants with patterns that look like source code." optional = false python-versions = ">=3.7" +files = [ + {file = "semgrep-1.36.0-cp37.cp38.cp39.cp310.cp311.py37.py38.py39.py310.py311-none-any.whl", hash = "sha256:36c258cc0b0081cf80780fc407416a85a92d0fe22d60061dd85c808438a91110"}, + {file = "semgrep-1.36.0-cp37.cp38.cp39.cp310.cp311.py37.py38.py39.py310.py311-none-macosx_10_14_x86_64.whl", hash = "sha256:e75f0f078a03357b5d32a11f02ee6a7bc65b0a2937513aa08da88819487f3e54"}, + {file = "semgrep-1.36.0-cp37.cp38.cp39.cp310.cp311.py37.py38.py39.py310.py311-none-macosx_11_0_arm64.whl", hash = "sha256:c7450f3659df4d929380f089fc82120a4ff535ec338ae34498eb75d05dfc31da"}, + {file = "semgrep-1.36.0-cp37.cp38.cp39.cp310.cp311.py37.py38.py39.py310.py311-none-manylinux2014_aarch64.whl", hash = "sha256:7aa2f595e0d3d0e7b2a9a77bfdc7838d2356cdf4aae58e108287a78975682153"}, + {file = "semgrep-1.36.0.tar.gz", hash = "sha256:26102e336a8b86872da3a062bc2f361e9b23add3a6fd24941880a549f8fec851"}, +] + +[package.dependencies] +attrs = ">=21.3" +boltons = ">=21.0,<22.0" +click = ">=8.1,<9.0" +click-option-group = ">=0.5,<1.0" +colorama = ">=0.4.0,<0.5.0" +defusedxml = ">=0.7.1,<0.8.0" +glom = ">=22.1,<23.0" +jsonschema = ">=4.6,<5.0" +packaging = ">=21.0" +peewee = ">=3.14,<4.0" +python-lsp-jsonrpc = ">=1.0.0,<1.1.0" +requests = ">=2.22,<3.0" +rich = ">=12.6.0" +"ruamel.yaml" = ">=0.16.0,<0.18" +tomli = ">=2.0.1,<2.1.0" +typing-extensions = ">=4.2,<5.0" +urllib3 = ">=1.26,<2.0" +wcmatch = ">=8.3,<9.0" [package.extras] -docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] -testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] +experiments = ["jsonnet (>=0.18,<1.0)"] [[package]] -name = "shapely" -version = "1.8.5.post1" -description = "Geometric objects, predicates, and operations" -category = "main" +name = "setuptools" +version = "68.1.2" +description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" +files = [ + {file = "setuptools-68.1.2-py3-none-any.whl", hash = "sha256:3d8083eed2d13afc9426f227b24fd1659489ec107c0e86cec2ffdde5c92e790b"}, + {file = "setuptools-68.1.2.tar.gz", hash = "sha256:3d4dfa6d95f1b101d695a6160a7626e15583af71a5f52176efa5d39a054d475d"}, +] [package.extras] -all = ["numpy", "pytest", "pytest-cov"] -test = ["pytest", "pytest-cov"] -vectorized = ["numpy"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5,<=7.1.2)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] [[package]] name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] [[package]] -name = "text-unidecode" -version = "1.3" -description = "The most basic Text::Unidecode port" -category = "main" +name = "taskipy" +version = "1.12.0" +description = "tasks runner for python projects" optional = false -python-versions = "*" +python-versions = ">=3.6,<4.0" +files = [ + {file = "taskipy-1.12.0-py3-none-any.whl", hash = "sha256:38306fbc952a7ca314b8f842a74b2fc38535cdab21031fe89e714a83e6259a84"}, + {file = "taskipy-1.12.0.tar.gz", hash = "sha256:e3dd7c53f7c9c4fd17dc908b1037f545afc452907eb0953b84e91c0a9a9d809d"}, +] + +[package.dependencies] +colorama = ">=0.4.4,<0.5.0" +mslex = {version = ">=0.3.0,<0.4.0", markers = "sys_platform == \"win32\""} +psutil = ">=5.7.2,<6.0.0" +tomli = {version = ">=2.0.1,<3.0.0", markers = "python_version >= \"3.7\" and python_version < \"4.0\""} [[package]] -name = "toml" -version = "0.10.2" -description = "Python Library for Tom's Obvious, Minimal Language" -category = "main" +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] [[package]] name = "tomlkit" -version = "0.7.0" +version = "0.11.8" description = "Style preserving TOML library" -category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +python-versions = ">=3.7" +files = [ + {file = "tomlkit-0.11.8-py3-none-any.whl", hash = "sha256:8c726c4c202bdb148667835f68d68780b9a003a9ec34167b6c673b38eff2a171"}, + {file = "tomlkit-0.11.8.tar.gz", hash = "sha256:9330fc7faa1db67b541b28e62018c17d20be733177d290a13b24c62d1614e0c3"}, +] [[package]] name = "tqdm" -version = "4.50.2" +version = "4.66.1" description = "Fast, Extensible Progress Meter" -category = "main" optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*" +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, + {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] -dev = ["argopt", "py-make (>=0.1.0)", "pydoc-markdown", "twine"] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] [[package]] name = "typing-extensions" +version = "4.7.1" +description = "Backported and Experimental Type Hints for Python 3.7+" +optional = false +python-versions = ">=3.7" +files = [ + {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, + {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, +] + +[[package]] +name = "tzdata" +version = "2023.3" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, + {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, +] + +[[package]] +name = "ujson" +version = "5.8.0" +description = "Ultra fast JSON encoder and decoder for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "ujson-5.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f4511560d75b15ecb367eef561554959b9d49b6ec3b8d5634212f9fed74a6df1"}, + {file = "ujson-5.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9399eaa5d1931a0ead49dce3ffacbea63f3177978588b956036bfe53cdf6af75"}, + {file = "ujson-5.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4e7bb7eba0e1963f8b768f9c458ecb193e5bf6977090182e2b4f4408f35ac76"}, + {file = "ujson-5.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40931d7c08c4ce99adc4b409ddb1bbb01635a950e81239c2382cfe24251b127a"}, + {file = "ujson-5.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d53039d39de65360e924b511c7ca1a67b0975c34c015dd468fca492b11caa8f7"}, + {file = "ujson-5.8.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bdf04c6af3852161be9613e458a1fb67327910391de8ffedb8332e60800147a2"}, + {file = "ujson-5.8.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a70f776bda2e5072a086c02792c7863ba5833d565189e09fabbd04c8b4c3abba"}, + {file = "ujson-5.8.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f26629ac531d712f93192c233a74888bc8b8212558bd7d04c349125f10199fcf"}, + {file = "ujson-5.8.0-cp310-cp310-win32.whl", hash = "sha256:7ecc33b107ae88405aebdb8d82c13d6944be2331ebb04399134c03171509371a"}, + {file = "ujson-5.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:3b27a8da7a080add559a3b73ec9ebd52e82cc4419f7c6fb7266e62439a055ed0"}, + {file = "ujson-5.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:193349a998cd821483a25f5df30b44e8f495423840ee11b3b28df092ddfd0f7f"}, + {file = "ujson-5.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4ddeabbc78b2aed531f167d1e70387b151900bc856d61e9325fcdfefb2a51ad8"}, + {file = "ujson-5.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ce24909a9c25062e60653073dd6d5e6ec9d6ad7ed6e0069450d5b673c854405"}, + {file = "ujson-5.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27a2a3c7620ebe43641e926a1062bc04e92dbe90d3501687957d71b4bdddaec4"}, + {file = "ujson-5.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b852bdf920fe9f84e2a2c210cc45f1b64f763b4f7d01468b33f7791698e455e"}, + {file = "ujson-5.8.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:20768961a6a706170497129960762ded9c89fb1c10db2989c56956b162e2a8a3"}, + {file = "ujson-5.8.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e0147d41e9fb5cd174207c4a2895c5e24813204499fd0839951d4c8784a23bf5"}, + {file = "ujson-5.8.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e3673053b036fd161ae7a5a33358ccae6793ee89fd499000204676baafd7b3aa"}, + {file = "ujson-5.8.0-cp311-cp311-win32.whl", hash = "sha256:a89cf3cd8bf33a37600431b7024a7ccf499db25f9f0b332947fbc79043aad879"}, + {file = "ujson-5.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:3659deec9ab9eb19e8646932bfe6fe22730757c4addbe9d7d5544e879dc1b721"}, + {file = "ujson-5.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:102bf31c56f59538cccdfec45649780ae00657e86247c07edac434cb14d5388c"}, + {file = "ujson-5.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:299a312c3e85edee1178cb6453645217ba23b4e3186412677fa48e9a7f986de6"}, + {file = "ujson-5.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2e385a7679b9088d7bc43a64811a7713cc7c33d032d020f757c54e7d41931ae"}, + {file = "ujson-5.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad24ec130855d4430a682c7a60ca0bc158f8253ec81feed4073801f6b6cb681b"}, + {file = "ujson-5.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16fde596d5e45bdf0d7de615346a102510ac8c405098e5595625015b0d4b5296"}, + {file = "ujson-5.8.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6d230d870d1ce03df915e694dcfa3f4e8714369cce2346686dbe0bc8e3f135e7"}, + {file = "ujson-5.8.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9571de0c53db5cbc265945e08f093f093af2c5a11e14772c72d8e37fceeedd08"}, + {file = "ujson-5.8.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7cba16b26efe774c096a5e822e4f27097b7c81ed6fb5264a2b3f5fd8784bab30"}, + {file = "ujson-5.8.0-cp312-cp312-win32.whl", hash = "sha256:48c7d373ff22366eecfa36a52b9b55b0ee5bd44c2b50e16084aa88b9de038916"}, + {file = "ujson-5.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:5ac97b1e182d81cf395ded620528c59f4177eee024b4b39a50cdd7b720fdeec6"}, + {file = "ujson-5.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2a64cc32bb4a436e5813b83f5aab0889927e5ea1788bf99b930fad853c5625cb"}, + {file = "ujson-5.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e54578fa8838ddc722539a752adfce9372474114f8c127bb316db5392d942f8b"}, + {file = "ujson-5.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9721cd112b5e4687cb4ade12a7b8af8b048d4991227ae8066d9c4b3a6642a582"}, + {file = "ujson-5.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d9707e5aacf63fb919f6237d6490c4e0244c7f8d3dc2a0f84d7dec5db7cb54c"}, + {file = "ujson-5.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0be81bae295f65a6896b0c9030b55a106fb2dec69ef877253a87bc7c9c5308f7"}, + {file = "ujson-5.8.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ae7f4725c344bf437e9b881019c558416fe84ad9c6b67426416c131ad577df67"}, + {file = "ujson-5.8.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:9ab282d67ef3097105552bf151438b551cc4bedb3f24d80fada830f2e132aeb9"}, + {file = "ujson-5.8.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:94c7bd9880fa33fcf7f6d7f4cc032e2371adee3c5dba2922b918987141d1bf07"}, + {file = "ujson-5.8.0-cp38-cp38-win32.whl", hash = "sha256:bf5737dbcfe0fa0ac8fa599eceafae86b376492c8f1e4b84e3adf765f03fb564"}, + {file = "ujson-5.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:11da6bed916f9bfacf13f4fc6a9594abd62b2bb115acfb17a77b0f03bee4cfd5"}, + {file = "ujson-5.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:69b3104a2603bab510497ceabc186ba40fef38ec731c0ccaa662e01ff94a985c"}, + {file = "ujson-5.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9249fdefeb021e00b46025e77feed89cd91ffe9b3a49415239103fc1d5d9c29a"}, + {file = "ujson-5.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2873d196725a8193f56dde527b322c4bc79ed97cd60f1d087826ac3290cf9207"}, + {file = "ujson-5.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a4dafa9010c366589f55afb0fd67084acd8added1a51251008f9ff2c3e44042"}, + {file = "ujson-5.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a42baa647a50fa8bed53d4e242be61023bd37b93577f27f90ffe521ac9dc7a3"}, + {file = "ujson-5.8.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f3554eaadffe416c6f543af442066afa6549edbc34fe6a7719818c3e72ebfe95"}, + {file = "ujson-5.8.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:fb87decf38cc82bcdea1d7511e73629e651bdec3a43ab40985167ab8449b769c"}, + {file = "ujson-5.8.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:407d60eb942c318482bbfb1e66be093308bb11617d41c613e33b4ce5be789adc"}, + {file = "ujson-5.8.0-cp39-cp39-win32.whl", hash = "sha256:0fe1b7edaf560ca6ab023f81cbeaf9946a240876a993b8c5a21a1c539171d903"}, + {file = "ujson-5.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:3f9b63530a5392eb687baff3989d0fb5f45194ae5b1ca8276282fb647f8dcdb3"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:efeddf950fb15a832376c0c01d8d7713479fbeceaed1eaecb2665aa62c305aec"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d8283ac5d03e65f488530c43d6610134309085b71db4f675e9cf5dff96a8282"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb0142f6f10f57598655340a3b2c70ed4646cbe674191da195eb0985a9813b83"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07d459aca895eb17eb463b00441986b021b9312c6c8cc1d06880925c7f51009c"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:d524a8c15cfc863705991d70bbec998456a42c405c291d0f84a74ad7f35c5109"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d6f84a7a175c75beecde53a624881ff618e9433045a69fcfb5e154b73cdaa377"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b748797131ac7b29826d1524db1cc366d2722ab7afacc2ce1287cdafccddbf1f"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e72ba76313d48a1a3a42e7dc9d1db32ea93fac782ad8dde6f8b13e35c229130"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f504117a39cb98abba4153bf0b46b4954cc5d62f6351a14660201500ba31fe7f"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a8c91b6f4bf23f274af9002b128d133b735141e867109487d17e344d38b87d94"}, + {file = "ujson-5.8.0.tar.gz", hash = "sha256:78e318def4ade898a461b3d92a79f9441e7e0e4d2ad5419abed4336d702c7425"}, +] + +[[package]] +name = "uritemplate" version = "4.1.1" -description = "Backported and Experimental Type Hints for Python 3.6+" -category = "main" +description = "Implementation of RFC 6570 URI Templates" optional = false python-versions = ">=3.6" +files = [ + {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, + {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, +] [[package]] name = "urllib3" -version = "1.26.9" +version = "1.26.16" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "urllib3-1.26.16-py2.py3-none-any.whl", hash = "sha256:8d36afa7616d8ab714608411b4a3b13e58f463aee519024578e062e141dce20f"}, + {file = "urllib3-1.26.16.tar.gz", hash = "sha256:8f135f6502756bde6b2a9b28989df5fbe87c9970cecaa69041edcce7f0589b14"}, +] [package.extras] brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] +[[package]] +name = "virtualenv" +version = "20.24.3" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.7" +files = [ + {file = "virtualenv-20.24.3-py3-none-any.whl", hash = "sha256:95a6e9398b4967fbcb5fef2acec5efaf9aa4972049d9ae41f95e0972a683fd02"}, + {file = "virtualenv-20.24.3.tar.gz", hash = "sha256:e5c3b4ce817b0b328af041506a2a299418c98747c4b1e68cb7527e74ced23efc"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<4" + +[package.extras] +docs = ["furo (>=2023.5.20)", "proselint (>=0.13)", "sphinx (>=7.0.1)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] + +[[package]] +name = "wcmatch" +version = "8.4.1" +description = "Wildcard/glob file name matcher." +optional = false +python-versions = ">=3.7" +files = [ + {file = "wcmatch-8.4.1-py3-none-any.whl", hash = "sha256:3476cd107aba7b25ba1d59406938a47dc7eec6cfd0ad09ff77193f21a964dee7"}, + {file = "wcmatch-8.4.1.tar.gz", hash = "sha256:b1f042a899ea4c458b7321da1b5e3331e3e0ec781583434de1301946ceadb943"}, +] + +[package.dependencies] +bracex = ">=2.1.1" + [[package]] name = "win32-setctime" version = "1.1.0" description = "A small Python utility to set file creation time on Windows" -category = "main" optional = false python-versions = ">=3.5" +files = [ + {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"}, + {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, +] [package.extras] dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] +[[package]] +name = "yarl" +version = "1.9.2" +description = "Yet another URL library" +optional = true +python-versions = ">=3.7" +files = [ + {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c2ad583743d16ddbdf6bb14b5cd76bf43b0d0006e918809d5d4ddf7bde8dd82"}, + {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82aa6264b36c50acfb2424ad5ca537a2060ab6de158a5bd2a72a032cc75b9eb8"}, + {file = "yarl-1.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c0c77533b5ed4bcc38e943178ccae29b9bcf48ffd1063f5821192f23a1bd27b9"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee4afac41415d52d53a9833ebae7e32b344be72835bbb589018c9e938045a560"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9bf345c3a4f5ba7f766430f97f9cc1320786f19584acc7086491f45524a551ac"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a96c19c52ff442a808c105901d0bdfd2e28575b3d5f82e2f5fd67e20dc5f4ea"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:891c0e3ec5ec881541f6c5113d8df0315ce5440e244a716b95f2525b7b9f3608"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c3a53ba34a636a256d767c086ceb111358876e1fb6b50dfc4d3f4951d40133d5"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:566185e8ebc0898b11f8026447eacd02e46226716229cea8db37496c8cdd26e0"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:2b0738fb871812722a0ac2154be1f049c6223b9f6f22eec352996b69775b36d4"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:32f1d071b3f362c80f1a7d322bfd7b2d11e33d2adf395cc1dd4df36c9c243095"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:e9fdc7ac0d42bc3ea78818557fab03af6181e076a2944f43c38684b4b6bed8e3"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:56ff08ab5df8429901ebdc5d15941b59f6253393cb5da07b4170beefcf1b2528"}, + {file = "yarl-1.9.2-cp310-cp310-win32.whl", hash = "sha256:8ea48e0a2f931064469bdabca50c2f578b565fc446f302a79ba6cc0ee7f384d3"}, + {file = "yarl-1.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:50f33040f3836e912ed16d212f6cc1efb3231a8a60526a407aeb66c1c1956dde"}, + {file = "yarl-1.9.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:646d663eb2232d7909e6601f1a9107e66f9791f290a1b3dc7057818fe44fc2b6"}, + {file = "yarl-1.9.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aff634b15beff8902d1f918012fc2a42e0dbae6f469fce134c8a0dc51ca423bb"}, + {file = "yarl-1.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a83503934c6273806aed765035716216cc9ab4e0364f7f066227e1aaea90b8d0"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b25322201585c69abc7b0e89e72790469f7dad90d26754717f3310bfe30331c2"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22a94666751778629f1ec4280b08eb11815783c63f52092a5953faf73be24191"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ec53a0ea2a80c5cd1ab397925f94bff59222aa3cf9c6da938ce05c9ec20428d"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:159d81f22d7a43e6eabc36d7194cb53f2f15f498dbbfa8edc8a3239350f59fe7"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:832b7e711027c114d79dffb92576acd1bd2decc467dec60e1cac96912602d0e6"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:95d2ecefbcf4e744ea952d073c6922e72ee650ffc79028eb1e320e732898d7e8"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d4e2c6d555e77b37288eaf45b8f60f0737c9efa3452c6c44626a5455aeb250b9"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:783185c75c12a017cc345015ea359cc801c3b29a2966c2655cd12b233bf5a2be"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:b8cc1863402472f16c600e3e93d542b7e7542a540f95c30afd472e8e549fc3f7"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:822b30a0f22e588b32d3120f6d41e4ed021806418b4c9f0bc3048b8c8cb3f92a"}, + {file = "yarl-1.9.2-cp311-cp311-win32.whl", hash = "sha256:a60347f234c2212a9f0361955007fcf4033a75bf600a33c88a0a8e91af77c0e8"}, + {file = "yarl-1.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:be6b3fdec5c62f2a67cb3f8c6dbf56bbf3f61c0f046f84645cd1ca73532ea051"}, + {file = "yarl-1.9.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38a3928ae37558bc1b559f67410df446d1fbfa87318b124bf5032c31e3447b74"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac9bb4c5ce3975aeac288cfcb5061ce60e0d14d92209e780c93954076c7c4367"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3da8a678ca8b96c8606bbb8bfacd99a12ad5dd288bc6f7979baddd62f71c63ef"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13414591ff516e04fcdee8dc051c13fd3db13b673c7a4cb1350e6b2ad9639ad3"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf74d08542c3a9ea97bb8f343d4fcbd4d8f91bba5ec9d5d7f792dbe727f88938"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e7221580dc1db478464cfeef9b03b95c5852cc22894e418562997df0d074ccc"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:494053246b119b041960ddcd20fd76224149cfea8ed8777b687358727911dd33"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:52a25809fcbecfc63ac9ba0c0fb586f90837f5425edfd1ec9f3372b119585e45"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:e65610c5792870d45d7b68c677681376fcf9cc1c289f23e8e8b39c1485384185"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:1b1bba902cba32cdec51fca038fd53f8beee88b77efc373968d1ed021024cc04"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:662e6016409828ee910f5d9602a2729a8a57d74b163c89a837de3fea050c7582"}, + {file = "yarl-1.9.2-cp37-cp37m-win32.whl", hash = "sha256:f364d3480bffd3aa566e886587eaca7c8c04d74f6e8933f3f2c996b7f09bee1b"}, + {file = "yarl-1.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6a5883464143ab3ae9ba68daae8e7c5c95b969462bbe42e2464d60e7e2698368"}, + {file = "yarl-1.9.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5610f80cf43b6202e2c33ba3ec2ee0a2884f8f423c8f4f62906731d876ef4fac"}, + {file = "yarl-1.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b9a4e67ad7b646cd6f0938c7ebfd60e481b7410f574c560e455e938d2da8e0f4"}, + {file = "yarl-1.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:83fcc480d7549ccebe9415d96d9263e2d4226798c37ebd18c930fce43dfb9574"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fcd436ea16fee7d4207c045b1e340020e58a2597301cfbcfdbe5abd2356c2fb"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84e0b1599334b1e1478db01b756e55937d4614f8654311eb26012091be109d59"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3458a24e4ea3fd8930e934c129b676c27452e4ebda80fbe47b56d8c6c7a63a9e"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:838162460b3a08987546e881a2bfa573960bb559dfa739e7800ceeec92e64417"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4e2d08f07a3d7d3e12549052eb5ad3eab1c349c53ac51c209a0e5991bbada78"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:de119f56f3c5f0e2fb4dee508531a32b069a5f2c6e827b272d1e0ff5ac040333"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:149ddea5abf329752ea5051b61bd6c1d979e13fbf122d3a1f9f0c8be6cb6f63c"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:674ca19cbee4a82c9f54e0d1eee28116e63bc6fd1e96c43031d11cbab8b2afd5"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:9b3152f2f5677b997ae6c804b73da05a39daa6a9e85a512e0e6823d81cdad7cc"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5415d5a4b080dc9612b1b63cba008db84e908b95848369aa1da3686ae27b6d2b"}, + {file = "yarl-1.9.2-cp38-cp38-win32.whl", hash = "sha256:f7a3d8146575e08c29ed1cd287068e6d02f1c7bdff8970db96683b9591b86ee7"}, + {file = "yarl-1.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:63c48f6cef34e6319a74c727376e95626f84ea091f92c0250a98e53e62c77c72"}, + {file = "yarl-1.9.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:75df5ef94c3fdc393c6b19d80e6ef1ecc9ae2f4263c09cacb178d871c02a5ba9"}, + {file = "yarl-1.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c027a6e96ef77d401d8d5a5c8d6bc478e8042f1e448272e8d9752cb0aff8b5c8"}, + {file = "yarl-1.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3b078dbe227f79be488ffcfc7a9edb3409d018e0952cf13f15fd6512847f3f7"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59723a029760079b7d991a401386390c4be5bfec1e7dd83e25a6a0881859e716"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b03917871bf859a81ccb180c9a2e6c1e04d2f6a51d953e6a5cdd70c93d4e5a2a"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c1012fa63eb6c032f3ce5d2171c267992ae0c00b9e164efe4d73db818465fac3"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a74dcbfe780e62f4b5a062714576f16c2f3493a0394e555ab141bf0d746bb955"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8c56986609b057b4839968ba901944af91b8e92f1725d1a2d77cbac6972b9ed1"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2c315df3293cd521033533d242d15eab26583360b58f7ee5d9565f15fee1bef4"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b7232f8dfbd225d57340e441d8caf8652a6acd06b389ea2d3222b8bc89cbfca6"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:53338749febd28935d55b41bf0bcc79d634881195a39f6b2f767870b72514caf"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:066c163aec9d3d073dc9ffe5dd3ad05069bcb03fcaab8d221290ba99f9f69ee3"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8288d7cd28f8119b07dd49b7230d6b4562f9b61ee9a4ab02221060d21136be80"}, + {file = "yarl-1.9.2-cp39-cp39-win32.whl", hash = "sha256:b124e2a6d223b65ba8768d5706d103280914d61f5cae3afbc50fc3dfcc016623"}, + {file = "yarl-1.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:61016e7d582bc46a5378ffdd02cd0314fb8ba52f40f9cf4d9a5e7dbef88dee18"}, + {file = "yarl-1.9.2.tar.gz", hash = "sha256:04ab9d4b9f587c06d801c2abfe9317b77cdf996c65a90d5e84ecc45010823571"}, +] + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" + [[package]] name = "zipp" -version = "3.7.0" +version = "3.16.2" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "main" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" +files = [ + {file = "zipp-3.16.2-py3-none-any.whl", hash = "sha256:679e51dd4403591b2d6838a48de3d283f3d188412a9782faadf845f298736ba0"}, + {file = "zipp-3.16.2.tar.gz", hash = "sha256:ebc15946aa78bd63458992fc81ec3b6f7b1e92d51c35e6de1c3804e73b799147"}, +] [package.extras] -docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"] -testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] + +[extras] +avro = ["pandavro"] +cli = ["click"] +upload = ["gql", "requests-toolbelt"] [metadata] -lock-version = "1.1" -python-versions = ">=3.7.1,<3.11" -content-hash = "15603cc8338dc961eb1f7b28d0c27f26a686f9303d9b5a6d4252cc1f9914c05a" - -[metadata.files] -cachetools = [ - {file = "cachetools-4.2.4-py3-none-any.whl", hash = "sha256:92971d3cb7d2a97efff7c7bb1657f21a8f5fb309a37530537c71b1774189f2d1"}, - {file = "cachetools-4.2.4.tar.gz", hash = "sha256:89ea6f1b638d5a73a4f9226be57ac5e4f399d22770b92355f92dcb0f7f001693"}, -] -certifi = [ - {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"}, - {file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"}, -] -charset-normalizer = [ - {file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"}, - {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"}, -] -ckanapi = [ - {file = "ckanapi-4.6.tar.gz", hash = "sha256:35361965bfb38c8e146d7229f2d7c3aaf1c0f2ef547de4239b4d38931bf081d2"}, -] -click = [ - {file = "click-8.0.3-py3-none-any.whl", hash = "sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3"}, - {file = "click-8.0.3.tar.gz", hash = "sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b"}, -] -colorama = [ - {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, - {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, -] -db-dtypes = [ - {file = "db-dtypes-1.0.0.tar.gz", hash = "sha256:3070d1a8d86ff0b5d9b16f15c5fab9c18893c6b3d5723cd95ee397b169049454"}, - {file = "db_dtypes-1.0.0-py2.py3-none-any.whl", hash = "sha256:66f6c1b87161814292a2856d1acc17fd4af1b7055853dc7d11af33dc5b94f64e"}, -] -docopt = [ - {file = "docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"}, -] -fastavro = [ - {file = "fastavro-1.4.10-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:f225c81e869b3cefef6f2b478cd452693181ed7e645be3cea4d82024354ecaa0"}, - {file = "fastavro-1.4.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7669302c9124b7cd6c1bdff80c77116b2290c984814077fb7d01d7389586054"}, - {file = "fastavro-1.4.10-cp310-cp310-win_amd64.whl", hash = "sha256:995525bdfbdfef205ea148b5bc6a9fe5ccf921931123c39d9aad75a2b661681e"}, - {file = "fastavro-1.4.10-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:64244c53f1e4853184c2f7383d0332e1dcb34c38c05e6613530ade0378e8acfc"}, - {file = "fastavro-1.4.10-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c60965da6dc7a91e00ccd84d84797fad746555f44e8a816c4cc460fb231c44fe"}, - {file = "fastavro-1.4.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10eb25378f37dc00c10e83c4c0442c1a6d1b871f74a6dfdfc12b6447962bbdd0"}, - {file = "fastavro-1.4.10-cp37-cp37m-win_amd64.whl", hash = "sha256:d5719adf6045fc743de5fa738d561a81e58dc782c94f1b16cb21b5dd6253e7fd"}, - {file = "fastavro-1.4.10-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:24c4a1a8cc92e135ecfcd9cbd1f6cfa088cbc74d78c18e02a609cb11fa33778d"}, - {file = "fastavro-1.4.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0934490b0c3bcfd6bcbacbcb5144c6b5e4298cda209fbb17c856adf5405127dd"}, - {file = "fastavro-1.4.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a23763d73412c077aee401a0368c64cbc23859e26711dbae78a3cf0227f65165"}, - {file = "fastavro-1.4.10-cp38-cp38-win_amd64.whl", hash = "sha256:09f1dfdd8192ae09e0f477d1f024d8054fccdb099ad495d2a796bcee3cadebd1"}, - {file = "fastavro-1.4.10-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:0c6695753fa3035bbd0fa5cb21bf1b5dad39483c669b32ca0bb55fb07c1ccc87"}, - {file = "fastavro-1.4.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35f960dbba04647d8d6d5616f879622d2a1e8a84eb2d2e02a883a22e0803463a"}, - {file = "fastavro-1.4.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9660878ca01e3dbbee12385c5902a2b6b12ecbb5af4733d1026175a14b1ef67f"}, - {file = "fastavro-1.4.10-cp39-cp39-win_amd64.whl", hash = "sha256:64cbd386e408b3bcb2de53b1f847163073eb0d0d0338db65f76051c6ba9a9bc1"}, - {file = "fastavro-1.4.10.tar.gz", hash = "sha256:a24f9dd803c44bfb599476b000f9bd0088f7ac2401e6c20818f38d8af12785a0"}, -] -google-api-core = [ - {file = "google-api-core-1.31.5.tar.gz", hash = "sha256:85d2074f2c8f9c07e614d7f978767d71ceb7d40647814ef4236d3a0ef671ee75"}, - {file = "google_api_core-1.31.5-py2.py3-none-any.whl", hash = "sha256:6815207a8b422e9da42c200681603f304b25f98c98b675a9db9fdc3717e44280"}, -] -google-auth = [ - {file = "google-auth-1.35.0.tar.gz", hash = "sha256:b7033be9028c188ee30200b204ea00ed82ea1162e8ac1df4aa6ded19a191d88e"}, - {file = "google_auth-1.35.0-py2.py3-none-any.whl", hash = "sha256:997516b42ecb5b63e8d80f5632c1a61dddf41d2a4c2748057837e06e00014258"}, -] -google-auth-oauthlib = [ - {file = "google-auth-oauthlib-0.5.1.tar.gz", hash = "sha256:30596b824fc6808fdaca2f048e4998cc40fb4b3599eaea66d28dc7085b36c5b8"}, - {file = "google_auth_oauthlib-0.5.1-py2.py3-none-any.whl", hash = "sha256:24f67735513c4c7134dbde2f1dee5a1deb6acc8dfcb577d7bff30d213a28e7b0"}, -] -google-cloud-bigquery = [ - {file = "google-cloud-bigquery-2.30.1.tar.gz", hash = "sha256:4e3b5e3dcc475d5a601d84872ac0b63e059540be2251b1c4165c51106d572855"}, - {file = "google_cloud_bigquery-2.30.1-py2.py3-none-any.whl", hash = "sha256:c62d601aa0f62388e1909d11de40db7597b02fb8602ccb7f21a3ac2a0997495b"}, -] -google-cloud-bigquery-storage = [ - {file = "google-cloud-bigquery-storage-1.1.0.tar.gz", hash = "sha256:c92533cedbb672f1a35555c112d4d5cccb9f8f6d0e98a604fbf98223773adad3"}, - {file = "google_cloud_bigquery_storage-1.1.0-py2.py3-none-any.whl", hash = "sha256:fc543e9d2343d34c043ad48984333ba84de10be31b7af8435548aaf8555507c4"}, -] -google-cloud-core = [ - {file = "google-cloud-core-2.2.3.tar.gz", hash = "sha256:89d2f7189bc6dc74de128d423ea52cc8719f0a5dbccd9ca80433f6504a20255c"}, - {file = "google_cloud_core-2.2.3-py2.py3-none-any.whl", hash = "sha256:a423852f4c36622376c8f0be509b67533690e061062368b763b92694c4ee06a7"}, -] -google-cloud-storage = [ - {file = "google-cloud-storage-1.42.3.tar.gz", hash = "sha256:7754d4dcaa45975514b404ece0da2bb4292acbc67ca559a69e12a19d54fcdb06"}, - {file = "google_cloud_storage-1.42.3-py2.py3-none-any.whl", hash = "sha256:71ee3a0dcf2c139f034a054181cd7658f1ec8f12837d2769c450a8a00fcd4c6d"}, -] -google-crc32c = [ - {file = "google-crc32c-1.3.0.tar.gz", hash = "sha256:276de6273eb074a35bc598f8efbc00c7869c5cf2e29c90748fccc8c898c244df"}, - {file = "google_crc32c-1.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cb6994fff247987c66a8a4e550ef374671c2b82e3c0d2115e689d21e511a652d"}, - {file = "google_crc32c-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c9da0a39b53d2fab3e5467329ed50e951eb91386e9d0d5b12daf593973c3b168"}, - {file = "google_crc32c-1.3.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:eb0b14523758e37802f27b7f8cd973f5f3d33be7613952c0df904b68c4842f0e"}, - {file = "google_crc32c-1.3.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:95c68a4b9b7828ba0428f8f7e3109c5d476ca44996ed9a5f8aac6269296e2d59"}, - {file = "google_crc32c-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c3cf890c3c0ecfe1510a452a165431b5831e24160c5fcf2071f0f85ca5a47cd"}, - {file = "google_crc32c-1.3.0-cp310-cp310-win32.whl", hash = "sha256:3bbce1be3687bbfebe29abdb7631b83e6b25da3f4e1856a1611eb21854b689ea"}, - {file = "google_crc32c-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:c124b8c8779bf2d35d9b721e52d4adb41c9bfbde45e6a3f25f0820caa9aba73f"}, - {file = "google_crc32c-1.3.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:42ae4781333e331a1743445931b08ebdad73e188fd554259e772556fc4937c48"}, - {file = "google_crc32c-1.3.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ff71073ebf0e42258a42a0b34f2c09ec384977e7f6808999102eedd5b49920e3"}, - {file = "google_crc32c-1.3.0-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:fe31de3002e7b08eb20823b3735b97c86c5926dd0581c7710a680b418a8709d4"}, - {file = "google_crc32c-1.3.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd7760a88a8d3d705ff562aa93f8445ead54f58fd482e4f9e2bafb7e177375d4"}, - {file = "google_crc32c-1.3.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a0b9e622c3b2b8d0ce32f77eba617ab0d6768b82836391e4f8f9e2074582bf02"}, - {file = "google_crc32c-1.3.0-cp36-cp36m-win32.whl", hash = "sha256:779cbf1ce375b96111db98fca913c1f5ec11b1d870e529b1dc7354b2681a8c3a"}, - {file = "google_crc32c-1.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:04e7c220798a72fd0f08242bc8d7a05986b2a08a0573396187fd32c1dcdd58b3"}, - {file = "google_crc32c-1.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e7a539b9be7b9c00f11ef16b55486141bc2cdb0c54762f84e3c6fc091917436d"}, - {file = "google_crc32c-1.3.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ca60076c388728d3b6ac3846842474f4250c91efbfe5afa872d3ffd69dd4b318"}, - {file = "google_crc32c-1.3.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05340b60bf05b574159e9bd940152a47d38af3fb43803ffe71f11d704b7696a6"}, - {file = "google_crc32c-1.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:318f73f5484b5671f0c7f5f63741ab020a599504ed81d209b5c7129ee4667407"}, - {file = "google_crc32c-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9f58099ad7affc0754ae42e6d87443299f15d739b0ce03c76f515153a5cda06c"}, - {file = "google_crc32c-1.3.0-cp37-cp37m-win32.whl", hash = "sha256:f52a4ad2568314ee713715b1e2d79ab55fab11e8b304fd1462ff5cccf4264b3e"}, - {file = "google_crc32c-1.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:bab4aebd525218bab4ee615786c4581952eadc16b1ff031813a2fd51f0cc7b08"}, - {file = "google_crc32c-1.3.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:dda4d8a3bb0b50f540f6ff4b6033f3a74e8bf0bd5320b70fab2c03e512a62812"}, - {file = "google_crc32c-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fec221a051150eeddfdfcff162e6db92c65ecf46cb0f7bb1bf812a1520ec026b"}, - {file = "google_crc32c-1.3.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:226f2f9b8e128a6ca6a9af9b9e8384f7b53a801907425c9a292553a3a7218ce0"}, - {file = "google_crc32c-1.3.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a7f9cbea4245ee36190f85fe1814e2d7b1e5f2186381b082f5d59f99b7f11328"}, - {file = "google_crc32c-1.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a4db36f9721fdf391646685ecffa404eb986cbe007a3289499020daf72e88a2"}, - {file = "google_crc32c-1.3.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:12674a4c3b56b706153a358eaa1018c4137a5a04635b92b4652440d3d7386206"}, - {file = "google_crc32c-1.3.0-cp38-cp38-win32.whl", hash = "sha256:650e2917660e696041ab3dcd7abac160b4121cd9a484c08406f24c5964099829"}, - {file = "google_crc32c-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:58be56ae0529c664cc04a9c76e68bb92b091e0194d6e3c50bea7e0f266f73713"}, - {file = "google_crc32c-1.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:96a8918a78d5d64e07c8ea4ed2bc44354e3f93f46a4866a40e8db934e4c0d74b"}, - {file = "google_crc32c-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:13af315c3a0eec8bb8b8d80b8b128cb3fcd17d7e4edafc39647846345a3f003a"}, - {file = "google_crc32c-1.3.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6311853aa2bba4064d0c28ca54e7b50c4d48e3de04f6770f6c60ebda1e975267"}, - {file = "google_crc32c-1.3.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ed447680ff21c14aaceb6a9f99a5f639f583ccfe4ce1a5e1d48eb41c3d6b3217"}, - {file = "google_crc32c-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1c1d6236feab51200272d79b3d3e0f12cf2cbb12b208c835b175a21efdb0a73"}, - {file = "google_crc32c-1.3.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e0f1ff55dde0ebcfbef027edc21f71c205845585fffe30d4ec4979416613e9b3"}, - {file = "google_crc32c-1.3.0-cp39-cp39-win32.whl", hash = "sha256:fbd60c6aaa07c31d7754edbc2334aef50601b7f1ada67a96eb1eb57c7c72378f"}, - {file = "google_crc32c-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:127f9cc3ac41b6a859bd9dc4321097b1a4f6aa7fdf71b4f9227b9e3ebffb4422"}, - {file = "google_crc32c-1.3.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:fc28e0db232c62ca0c3600884933178f0825c99be4474cdd645e378a10588125"}, - {file = "google_crc32c-1.3.0-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1926fd8de0acb9d15ee757175ce7242e235482a783cd4ec711cc999fc103c24e"}, - {file = "google_crc32c-1.3.0-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5da2c81575cc3ccf05d9830f9e8d3c70954819ca9a63828210498c0774fda1a3"}, - {file = "google_crc32c-1.3.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f712ce54e0d631370e1f4997b3f182f3368179198efc30d477c75d1f44942"}, - {file = "google_crc32c-1.3.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:7f6fe42536d9dcd3e2ffb9d3053f5d05221ae3bbcefbe472bdf2c71c793e3183"}, -] -google-resumable-media = [ - {file = "google-resumable-media-2.3.2.tar.gz", hash = "sha256:06924e8b1e79f158f0202e7dd151ad75b0ea9d59b997c850f56bdd4a5a361513"}, - {file = "google_resumable_media-2.3.2-py2.py3-none-any.whl", hash = "sha256:3c13f84813861ac8f5b6371254bdd437076bf1f3bac527a9f3fd123a70166f52"}, -] -googleapis-common-protos = [ - {file = "googleapis-common-protos-1.55.0.tar.gz", hash = "sha256:53eb313064738f45d5ac634155ae208e121c963659627b90dfcb61ef514c03e1"}, - {file = "googleapis_common_protos-1.55.0-py2.py3-none-any.whl", hash = "sha256:183bb0356bd614c4330ad5158bc1c1bcf9bcf7f5e7f911317559fe209496eeee"}, -] -grpcio = [ - {file = "grpcio-1.44.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:11f811c0fffd84fca747fbc742464575e5eb130fd4fb4d6012ccc34febd001db"}, - {file = "grpcio-1.44.0-cp310-cp310-macosx_10_10_universal2.whl", hash = "sha256:9a86a91201f8345502ea81dee0a55ae13add5fafadf109b17acd858fe8239651"}, - {file = "grpcio-1.44.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:5f3c54ebb5d9633a557335c01d88d3d4928e9b1b131692283b6184da1edbec0b"}, - {file = "grpcio-1.44.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3d47553b8e86ab1e59b0185ba6491a187f94a0239f414c8fc867a22b0405b798"}, - {file = "grpcio-1.44.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1e22d3a510438b7f3365c0071b810672d09febac6e8ca8a47eab657ae5f347b"}, - {file = "grpcio-1.44.0-cp310-cp310-win32.whl", hash = "sha256:41036a574cab3468f24d41d6ed2b52588fb85ed60f8feaa925d7e424a250740b"}, - {file = "grpcio-1.44.0-cp310-cp310-win_amd64.whl", hash = "sha256:4ee51964edfd0a1293a95bb0d72d134ecf889379d90d2612cbf663623ce832b4"}, - {file = "grpcio-1.44.0-cp36-cp36m-linux_armv7l.whl", hash = "sha256:e2149077d71e060678130644670389ddf1491200bcea16c5560d4ccdc65e3f2e"}, - {file = "grpcio-1.44.0-cp36-cp36m-macosx_10_10_x86_64.whl", hash = "sha256:0ac72d4b953b76924f8fa21436af060d7e6d8581e279863f30ee14f20751ac27"}, - {file = "grpcio-1.44.0-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:5c30a9a7d3a05920368a60b080cbbeaf06335303be23ac244034c71c03a0fd24"}, - {file = "grpcio-1.44.0-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:05467acd391e3fffb05991c76cb2ed2fa1309d0e3815ac379764bc5670b4b5d4"}, - {file = "grpcio-1.44.0-cp36-cp36m-manylinux_2_17_aarch64.whl", hash = "sha256:b81dc7894062ed2d25b74a2725aaa0a6895ce97ce854f432fe4e87cad5a07316"}, - {file = "grpcio-1.44.0-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:46d4843192e7d36278884282e100b8f305cf37d1b3d8c6b4f736d4454640a069"}, - {file = "grpcio-1.44.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:898c159148f27e23c08a337fb80d31ece6b76bb24f359d83929460d813665b74"}, - {file = "grpcio-1.44.0-cp36-cp36m-win32.whl", hash = "sha256:b8d852329336c584c636caa9c2db990f3a332b19bc86a80f4646b58d27c142db"}, - {file = "grpcio-1.44.0-cp36-cp36m-win_amd64.whl", hash = "sha256:790d7493337558ae168477d1be3178f4c9b8f91d8cd9b8b719d06fd9b2d48836"}, - {file = "grpcio-1.44.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:cd61b52d9cf8fcf8d9628c0b640b9e44fdc5e93d989cc268086a858540ed370c"}, - {file = "grpcio-1.44.0-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:14eefcf623890f3f7dd7831decd2a2116652b5ce1e0f1d4b464b8f52110743b0"}, - {file = "grpcio-1.44.0-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:bebe90b8020b4248e5a2076b56154cc6ff45691bbbe980579fc9db26717ac968"}, - {file = "grpcio-1.44.0-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:89b390b1c0de909965280d175c53128ce2f0f4f5c0f011382243dd7f2f894060"}, - {file = "grpcio-1.44.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:c122dac5cb299b8ad7308d61bd9fe0413de13b0347cce465398436b3fdf1f609"}, - {file = "grpcio-1.44.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6641a28cc826a92ef717201cca9a035c34a0185e38b0c93f3ce5f01a01a1570a"}, - {file = "grpcio-1.44.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb0a3e0e64843441793923d9532a3a23907b07b2a1e0a7a31f186dc185bb772"}, - {file = "grpcio-1.44.0-cp37-cp37m-win32.whl", hash = "sha256:be857b7ec2ac43455156e6ba89262f7d7ae60227049427d01a3fecd218a3f88d"}, - {file = "grpcio-1.44.0-cp37-cp37m-win_amd64.whl", hash = "sha256:f6a9cf0e77f72f2ac30c9c6e086bc7446c984c51bebc6c7f50fbcd718037edba"}, - {file = "grpcio-1.44.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:19e54f0c7083c8332b5a75a9081fc5127f1dbb67b6c1a32bd7fe896ef0934918"}, - {file = "grpcio-1.44.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:bfd36b959c3c4e945119387baed1414ea46f7116886aa23de0172302b49d7ff1"}, - {file = "grpcio-1.44.0-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:ccd388b8f37b19d06e4152189726ce309e36dc03b53f2216a4ea49f09a7438e6"}, - {file = "grpcio-1.44.0-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:9075c0c003c1ff14ebce8f0ba55cc692158cb55c68da09cf8b0f9fc5b749e343"}, - {file = "grpcio-1.44.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:e898194f76212facbaeb6d7545debff29351afa23b53ff8f0834d66611af5139"}, - {file = "grpcio-1.44.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8fa6584046a7cf281649975a363673fa5d9c6faf9dc923f261cc0e56713b5892"}, - {file = "grpcio-1.44.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36a7bdd6ef9bca050c7ade8cba5f0e743343ea0756d5d3d520e915098a9dc503"}, - {file = "grpcio-1.44.0-cp38-cp38-win32.whl", hash = "sha256:dc3290d0411ddd2bd49adba5793223de8de8b01588d45e9376f1a9f7d25414f4"}, - {file = "grpcio-1.44.0-cp38-cp38-win_amd64.whl", hash = "sha256:13343e7b840c20f43b44f0e6d3bbdc037c964f0aec9735d7cb685c407731c9ff"}, - {file = "grpcio-1.44.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:c5c2f8417d13386e18ccc8c61467cb6a6f9667a1ff7000a2d7d378e5d7df693f"}, - {file = "grpcio-1.44.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:cf220199b7b4992729ad4d55d5d3f652f4ccfe1a35b5eacdbecf189c245e1859"}, - {file = "grpcio-1.44.0-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:4201c597e5057a9bfef9ea5777a6d83f6252cb78044db7d57d941ec2300734a5"}, - {file = "grpcio-1.44.0-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:e2de61005118ae59d48d5d749283ebfd1ba4ca68cc1000f8a395cd2bdcff7ceb"}, - {file = "grpcio-1.44.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:871078218fa9117e2a378678f327e32fda04e363ed6bc0477275444273255d4d"}, - {file = "grpcio-1.44.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a8d610b7b557a7609fecee80b6dd793ecb7a9a3c3497fbdce63ce7d151cdd705"}, - {file = "grpcio-1.44.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fcb53e4eb8c271032c91b8981df5fc1bb974bc73e306ec2c27da41bd95c44b5"}, - {file = "grpcio-1.44.0-cp39-cp39-win32.whl", hash = "sha256:e50ddea6de76c09b656df4b5a55ae222e2a56e625c44250e501ff3c904113ec1"}, - {file = "grpcio-1.44.0-cp39-cp39-win_amd64.whl", hash = "sha256:d2ec124a986093e26420a5fb10fa3f02b2c232f924cdd7b844ddf7e846c020cd"}, - {file = "grpcio-1.44.0.tar.gz", hash = "sha256:4bae1c99896045d3062ab95478411c8d5a52cb84b91a1517312629fa6cfeb50e"}, -] -idna = [ - {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, - {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, -] -importlib-metadata = [ - {file = "importlib_metadata-4.11.3-py3-none-any.whl", hash = "sha256:1208431ca90a8cca1a6b8af391bb53c1a2db74e5d1cef6ddced95d4b2062edc6"}, - {file = "importlib_metadata-4.11.3.tar.gz", hash = "sha256:ea4c597ebf37142f827b8f39299579e31685c31d3a438b59f469406afd0f2539"}, -] -jinja2 = [ - {file = "Jinja2-3.0.3-py3-none-any.whl", hash = "sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8"}, - {file = "Jinja2-3.0.3.tar.gz", hash = "sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7"}, -] -loguru = [ - {file = "loguru-0.6.0-py3-none-any.whl", hash = "sha256:4e2414d534a2ab57573365b3e6d0234dfb1d84b68b7f3b948e6fb743860a77c3"}, - {file = "loguru-0.6.0.tar.gz", hash = "sha256:066bd06758d0a513e9836fd9c6b5a75bfb3fd36841f4b996bc60b547a309d41c"}, -] -markupsafe = [ - {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-win32.whl", hash = "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6"}, - {file = "MarkupSafe-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-win32.whl", hash = "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff"}, - {file = "MarkupSafe-2.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-win32.whl", hash = "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1"}, - {file = "MarkupSafe-2.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-win32.whl", hash = "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c"}, - {file = "MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247"}, - {file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"}, -] -numpy = [ - {file = "numpy-1.21.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:301e408a052fdcda5cdcf03021ebafc3c6ea093021bf9d1aa47c54d48bdad166"}, - {file = "numpy-1.21.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a7e8f6216f180f3fd4efb73de5d1eaefb5f5a1ee5b645c67333033e39440e63a"}, - {file = "numpy-1.21.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fc7a7d7b0ed72589fd8b8486b9b42a564f10b8762be8bd4d9df94b807af4a089"}, - {file = "numpy-1.21.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58ca1d7c8aef6e996112d0ce873ac9dfa1eaf4a1196b4ff7ff73880a09923ba7"}, - {file = "numpy-1.21.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc4b2fb01f1b4ddbe2453468ea0719f4dbb1f5caa712c8b21bb3dd1480cd30d9"}, - {file = "numpy-1.21.5-cp310-cp310-win_amd64.whl", hash = "sha256:cc1b30205d138d1005adb52087ff45708febbef0e420386f58664f984ef56954"}, - {file = "numpy-1.21.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:08de8472d9f7571f9d51b27b75e827f5296295fa78817032e84464be8bb905bc"}, - {file = "numpy-1.21.5-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4fe6a006557b87b352c04596a6e3f12a57d6e5f401d804947bd3188e6b0e0e76"}, - {file = "numpy-1.21.5-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3d893b0871322eaa2f8c7072cdb552d8e2b27645b7875a70833c31e9274d4611"}, - {file = "numpy-1.21.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:341dddcfe3b7b6427a28a27baa59af5ad51baa59bfec3264f1ab287aa3b30b13"}, - {file = "numpy-1.21.5-cp37-cp37m-win32.whl", hash = "sha256:ca9c23848292c6fe0a19d212790e62f398fd9609aaa838859be8459bfbe558aa"}, - {file = "numpy-1.21.5-cp37-cp37m-win_amd64.whl", hash = "sha256:025b497014bc33fc23897859350f284323f32a2fff7654697f5a5fc2a19e9939"}, - {file = "numpy-1.21.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3a5098df115340fb17fc93867317a947e1dcd978c3888c5ddb118366095851f8"}, - {file = "numpy-1.21.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:311283acf880cfcc20369201bd75da907909afc4666966c7895cbed6f9d2c640"}, - {file = "numpy-1.21.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b545ebadaa2b878c8630e5bcdb97fc4096e779f335fc0f943547c1c91540c815"}, - {file = "numpy-1.21.5-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:c5562bcc1a9b61960fc8950ade44d00e3de28f891af0acc96307c73613d18f6e"}, - {file = "numpy-1.21.5-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eed2afaa97ec33b4411995be12f8bdb95c87984eaa28d76cf628970c8a2d689a"}, - {file = "numpy-1.21.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61bada43d494515d5b122f4532af226fdb5ee08fe5b5918b111279843dc6836a"}, - {file = "numpy-1.21.5-cp38-cp38-win32.whl", hash = "sha256:7b9d6b14fc9a4864b08d1ba57d732b248f0e482c7b2ff55c313137e3ed4d8449"}, - {file = "numpy-1.21.5-cp38-cp38-win_amd64.whl", hash = "sha256:dbce7adeb66b895c6aaa1fad796aaefc299ced597f6fbd9ceddb0dd735245354"}, - {file = "numpy-1.21.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:507c05c7a37b3683eb08a3ff993bd1ee1e6c752f77c2f275260533b265ecdb6c"}, - {file = "numpy-1.21.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:00c9fa73a6989895b8815d98300a20ac993c49ac36c8277e8ffeaa3631c0dbbb"}, - {file = "numpy-1.21.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69a5a8d71c308d7ef33ef72371c2388a90e3495dbb7993430e674006f94797d5"}, - {file = "numpy-1.21.5-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2d8adfca843bc46ac199a4645233f13abf2011a0b2f4affc5c37cd552626f27b"}, - {file = "numpy-1.21.5-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c293d3c0321996cd8ffe84215ffe5d269fd9d1d12c6f4ffe2b597a7c30d3e593"}, - {file = "numpy-1.21.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c978544be9e04ed12016dd295a74283773149b48f507d69b36f91aa90a643e5"}, - {file = "numpy-1.21.5-cp39-cp39-win32.whl", hash = "sha256:2a9add27d7fc0fdb572abc3b2486eb3b1395da71e0254c5552b2aad2a18b5441"}, - {file = "numpy-1.21.5-cp39-cp39-win_amd64.whl", hash = "sha256:1964db2d4a00348b7a60ee9d013c8cb0c566644a589eaa80995126eac3b99ced"}, - {file = "numpy-1.21.5-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a7c4b701ca418cd39e28ec3b496e6388fe06de83f5f0cb74794fa31cfa384c02"}, - {file = "numpy-1.21.5.zip", hash = "sha256:6a5928bc6241264dce5ed509e66f33676fc97f464e7a919edc672fb5532221ee"}, -] -oauthlib = [ - {file = "oauthlib-3.2.0-py3-none-any.whl", hash = "sha256:6db33440354787f9b7f3a6dbd4febf5d0f93758354060e802f6c06cb493022fe"}, - {file = "oauthlib-3.2.0.tar.gz", hash = "sha256:23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2"}, -] -packaging = [ - {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, - {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, -] -pandas = [ - {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:62d5b5ce965bae78f12c1c0df0d387899dd4211ec0bdc52822373f13a3a022b9"}, - {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:adfeb11be2d54f275142c8ba9bf67acee771b7186a5745249c7d5a06c670136b"}, - {file = "pandas-1.3.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a8c055d58873ad81cae290d974d13dd479b82cbb975c3e1fa2cf1920715296"}, - {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd541ab09e1f80a2a1760032d665f6e032d8e44055d602d65eeea6e6e85498cb"}, - {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2651d75b9a167cc8cc572cf787ab512d16e316ae00ba81874b560586fa1325e0"}, - {file = "pandas-1.3.5-cp310-cp310-win_amd64.whl", hash = "sha256:aaf183a615ad790801fa3cf2fa450e5b6d23a54684fe386f7e3208f8b9bfbef6"}, - {file = "pandas-1.3.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:344295811e67f8200de2390093aeb3c8309f5648951b684d8db7eee7d1c81fb7"}, - {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:552020bf83b7f9033b57cbae65589c01e7ef1544416122da0c79140c93288f56"}, - {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cce0c6bbeb266b0e39e35176ee615ce3585233092f685b6a82362523e59e5b4"}, - {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d28a3c65463fd0d0ba8bbb7696b23073efee0510783340a44b08f5e96ffce0c"}, - {file = "pandas-1.3.5-cp37-cp37m-win32.whl", hash = "sha256:a62949c626dd0ef7de11de34b44c6475db76995c2064e2d99c6498c3dba7fe58"}, - {file = "pandas-1.3.5-cp37-cp37m-win_amd64.whl", hash = "sha256:8025750767e138320b15ca16d70d5cdc1886e8f9cc56652d89735c016cd8aea6"}, - {file = "pandas-1.3.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fe95bae4e2d579812865db2212bb733144e34d0c6785c0685329e5b60fcb85dd"}, - {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f261553a1e9c65b7a310302b9dbac31cf0049a51695c14ebe04e4bfd4a96f02"}, - {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b6dbec5f3e6d5dc80dcfee250e0a2a652b3f28663492f7dab9a24416a48ac39"}, - {file = "pandas-1.3.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3bc49af96cd6285030a64779de5b3688633a07eb75c124b0747134a63f4c05f"}, - {file = "pandas-1.3.5-cp38-cp38-win32.whl", hash = "sha256:b6b87b2fb39e6383ca28e2829cddef1d9fc9e27e55ad91ca9c435572cdba51bf"}, - {file = "pandas-1.3.5-cp38-cp38-win_amd64.whl", hash = "sha256:a395692046fd8ce1edb4c6295c35184ae0c2bbe787ecbe384251da609e27edcb"}, - {file = "pandas-1.3.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bd971a3f08b745a75a86c00b97f3007c2ea175951286cdda6abe543e687e5f2f"}, - {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37f06b59e5bc05711a518aa10beaec10942188dccb48918bb5ae602ccbc9f1a0"}, - {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c21778a688d3712d35710501f8001cdbf96eb70a7c587a3d5613573299fdca6"}, - {file = "pandas-1.3.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3345343206546545bc26a05b4602b6a24385b5ec7c75cb6059599e3d56831da2"}, - {file = "pandas-1.3.5-cp39-cp39-win32.whl", hash = "sha256:c69406a2808ba6cf580c2255bcf260b3f214d2664a3a4197d0e640f573b46fd3"}, - {file = "pandas-1.3.5-cp39-cp39-win_amd64.whl", hash = "sha256:32e1a26d5ade11b547721a72f9bfc4bd113396947606e00d5b4a5b79b3dcb006"}, - {file = "pandas-1.3.5.tar.gz", hash = "sha256:1e4285f5de1012de20ca46b188ccf33521bff61ba5c5ebd78b4fb28e5416a9f1"}, -] -pandas-gbq = [ - {file = "pandas-gbq-0.17.4.tar.gz", hash = "sha256:70ac57cc6ebf9d1e1c1c810f5ccac710163acd4c3d13e8badea27bb66fae19f7"}, - {file = "pandas_gbq-0.17.4-py2.py3-none-any.whl", hash = "sha256:3b3714167bdc4b1a6013ff6286a452727efbceb412922a2ca39aa996e8e8b129"}, -] -pandavro = [ - {file = "pandavro-1.6.0.tar.gz", hash = "sha256:d098da34529fbb20de5fd1a6f231918d1b60941b25bea5dc87897ef0d472cb6f"}, -] -proto-plus = [ - {file = "proto-plus-1.20.3.tar.gz", hash = "sha256:f28b225bc9e6c14e206fb7f8e996a46fb2ccd902648e512d496abb6a716a4ae5"}, - {file = "proto_plus-1.20.3-py3-none-any.whl", hash = "sha256:b06be21c3848fbc20387d1d6891a9b97dfa1cdd0f10d3d42ef70b5700ec0f423"}, -] -protobuf = [ - {file = "protobuf-3.19.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f51d5a9f137f7a2cec2d326a74b6e3fc79d635d69ffe1b036d39fc7d75430d37"}, - {file = "protobuf-3.19.4-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:09297b7972da685ce269ec52af761743714996b4381c085205914c41fcab59fb"}, - {file = "protobuf-3.19.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:072fbc78d705d3edc7ccac58a62c4c8e0cec856987da7df8aca86e647be4e35c"}, - {file = "protobuf-3.19.4-cp310-cp310-win32.whl", hash = "sha256:7bb03bc2873a2842e5ebb4801f5c7ff1bfbdf426f85d0172f7644fcda0671ae0"}, - {file = "protobuf-3.19.4-cp310-cp310-win_amd64.whl", hash = "sha256:f358aa33e03b7a84e0d91270a4d4d8f5df6921abe99a377828839e8ed0c04e07"}, - {file = "protobuf-3.19.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:1c91ef4110fdd2c590effb5dca8fdbdcb3bf563eece99287019c4204f53d81a4"}, - {file = "protobuf-3.19.4-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c438268eebb8cf039552897d78f402d734a404f1360592fef55297285f7f953f"}, - {file = "protobuf-3.19.4-cp36-cp36m-win32.whl", hash = "sha256:835a9c949dc193953c319603b2961c5c8f4327957fe23d914ca80d982665e8ee"}, - {file = "protobuf-3.19.4-cp36-cp36m-win_amd64.whl", hash = "sha256:4276cdec4447bd5015453e41bdc0c0c1234eda08420b7c9a18b8d647add51e4b"}, - {file = "protobuf-3.19.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6cbc312be5e71869d9d5ea25147cdf652a6781cf4d906497ca7690b7b9b5df13"}, - {file = "protobuf-3.19.4-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:54a1473077f3b616779ce31f477351a45b4fef8c9fd7892d6d87e287a38df368"}, - {file = "protobuf-3.19.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:435bb78b37fc386f9275a7035fe4fb1364484e38980d0dd91bc834a02c5ec909"}, - {file = "protobuf-3.19.4-cp37-cp37m-win32.whl", hash = "sha256:16f519de1313f1b7139ad70772e7db515b1420d208cb16c6d7858ea989fc64a9"}, - {file = "protobuf-3.19.4-cp37-cp37m-win_amd64.whl", hash = "sha256:cdc076c03381f5c1d9bb1abdcc5503d9ca8b53cf0a9d31a9f6754ec9e6c8af0f"}, - {file = "protobuf-3.19.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:69da7d39e39942bd52848438462674c463e23963a1fdaa84d88df7fbd7e749b2"}, - {file = "protobuf-3.19.4-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:48ed3877fa43e22bcacc852ca76d4775741f9709dd9575881a373bd3e85e54b2"}, - {file = "protobuf-3.19.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd95d1dfb9c4f4563e6093a9aa19d9c186bf98fa54da5252531cc0d3a07977e7"}, - {file = "protobuf-3.19.4-cp38-cp38-win32.whl", hash = "sha256:b38057450a0c566cbd04890a40edf916db890f2818e8682221611d78dc32ae26"}, - {file = "protobuf-3.19.4-cp38-cp38-win_amd64.whl", hash = "sha256:7ca7da9c339ca8890d66958f5462beabd611eca6c958691a8fe6eccbd1eb0c6e"}, - {file = "protobuf-3.19.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:36cecbabbda242915529b8ff364f2263cd4de7c46bbe361418b5ed859677ba58"}, - {file = "protobuf-3.19.4-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c1068287025f8ea025103e37d62ffd63fec8e9e636246b89c341aeda8a67c934"}, - {file = "protobuf-3.19.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96bd766831596d6014ca88d86dc8fe0fb2e428c0b02432fd9db3943202bf8c5e"}, - {file = "protobuf-3.19.4-cp39-cp39-win32.whl", hash = "sha256:84123274d982b9e248a143dadd1b9815049f4477dc783bf84efe6250eb4b836a"}, - {file = "protobuf-3.19.4-cp39-cp39-win_amd64.whl", hash = "sha256:3112b58aac3bac9c8be2b60a9daf6b558ca3f7681c130dcdd788ade7c9ffbdca"}, - {file = "protobuf-3.19.4-py2.py3-none-any.whl", hash = "sha256:8961c3a78ebfcd000920c9060a262f082f29838682b1f7201889300c1fbe0616"}, - {file = "protobuf-3.19.4.tar.gz", hash = "sha256:9df0c10adf3e83015ced42a9a7bd64e13d06c4cf45c340d2c63020ea04499d0a"}, -] -pyaml = [ - {file = "pyaml-20.4.0-py2.py3-none-any.whl", hash = "sha256:67081749a82b72c45e5f7f812ee3a14a03b3f5c25ff36ec3b290514f8c4c4b99"}, - {file = "pyaml-20.4.0.tar.gz", hash = "sha256:29a5c2a68660a799103d6949167bd6c7953d031449d08802386372de1db6ad71"}, -] -pyarrow = [ - {file = "pyarrow-6.0.0-cp310-cp310-macosx_10_13_universal2.whl", hash = "sha256:c7a6e7e0bf8779e9c3428ced85507541f3da9a0675e2f4781d4eb2c7042cbf81"}, - {file = "pyarrow-6.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:7a683f71b848eb6310b4ec48c0def55dac839e9994c1ac874c9b2d3d5625def1"}, - {file = "pyarrow-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5144bd9db2920c7cb566c96462d62443cc239104f94771d110f74393f2fb42a2"}, - {file = "pyarrow-6.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed0be080cf595ea15ff1c9ff4097bbf1fcc4b50847d98c0a3c0412fbc6ede7e9"}, - {file = "pyarrow-6.0.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:072c1a0fca4509eefd7d018b78542fb7e5c63aaf5698f1c0a6e45628ae17ba44"}, - {file = "pyarrow-6.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5bed4f948c032c40597302e9bdfa65f62295240306976ecbe43a54924c6f94f"}, - {file = "pyarrow-6.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:465f87fa0be0b2928b2beeba22b5813a0203fb05d90fd8563eea48e08ecc030e"}, - {file = "pyarrow-6.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:ddf2e6e3b321adaaf716f2d5af8e92d205a9671e0cb7c0779710a567fd1dd580"}, - {file = "pyarrow-6.0.0-cp36-cp36m-macosx_10_13_x86_64.whl", hash = "sha256:0204e80777ab8f4e9abd3a765a8ec07ed1e3c4630bacda50d2ce212ef0f3826f"}, - {file = "pyarrow-6.0.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:82fe80309e01acf29e3943a1f6d3c98ec109fe1d356bc1ac37d639bcaadcf684"}, - {file = "pyarrow-6.0.0-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:281ce5fa03621d786a9beb514abb09846db7f0221b50eabf543caa24037eaacd"}, - {file = "pyarrow-6.0.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5408fa8d623e66a0445f3fb0e4027fd219bf99bfb57422d543d7b7876e2c5b55"}, - {file = "pyarrow-6.0.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a19e58dfb04e451cd8b7bdec3ac8848373b95dfc53492c9a69789aa9074a3c1b"}, - {file = "pyarrow-6.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:b86d175262db1eb46afdceb36d459409eb6f8e532d3dec162f8bf572c7f57623"}, - {file = "pyarrow-6.0.0-cp37-cp37m-macosx_10_13_x86_64.whl", hash = "sha256:2d2c681659396c745e4f1988d5dd41dcc3ad557bb8d4a8c2e44030edafc08a91"}, - {file = "pyarrow-6.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5c666bc6a1cebf01206e2dc1ab05f25f39f35d3a499e0ef5cd635225e07306ca"}, - {file = "pyarrow-6.0.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8d41dfb09ba9236cca6245f33088eb42f3c54023da281139241e0f9f3b4b754e"}, - {file = "pyarrow-6.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:477c746ef42c039348a288584800e299456c80c5691401bb9b19aa9c02a427b7"}, - {file = "pyarrow-6.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c38263ea438a1666b13372e7565450cfeec32dbcd1c2595749476a58465eaec"}, - {file = "pyarrow-6.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e81508239a71943759cee272ce625ae208092dd36ef2c6713fccee30bbcf52bb"}, - {file = "pyarrow-6.0.0-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:a50d2f77b86af38ceabf45617208b9105d20e7a5eebc584e7c8c0acededd82ce"}, - {file = "pyarrow-6.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fbda7595f24a639bcef3419ecfac17216efacb09f7b0f1b4c4c97f900d65ca0e"}, - {file = "pyarrow-6.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bf3400780c4d3c9cb43b1e8a1aaf2e1b7199a0572d0a645529d2784e4d0d8497"}, - {file = "pyarrow-6.0.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:15dc0d673d3f865ca63c877bd7a2eced70b0a08969fb733a28247134b8a1f18b"}, - {file = "pyarrow-6.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a1d9a2f4ee812ed0bd4182cabef99ea914ac297274f0de086f2488093d284ef"}, - {file = "pyarrow-6.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d046dc78a9337baa6415be915c5a16222505233e238a1017f368243c89817eea"}, - {file = "pyarrow-6.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:ea64a48a85c631eb2a0ea13ccdec5143c85b5897836b16331ee4289d27a57247"}, - {file = "pyarrow-6.0.0-cp39-cp39-macosx_10_13_universal2.whl", hash = "sha256:cc1d4a70efd583befe92d4ea6f74ed2e0aa31ccdde767cd5cae8e77c65a1c2d4"}, - {file = "pyarrow-6.0.0-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:004185e0babc6f3c3fba6ba4f106e406a0113d0f82bb9ad9a8571a1978c45d04"}, - {file = "pyarrow-6.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8c23f8cdecd3d9e49f9b0f9a651ae5549d1d32fd4901fb1bdc2d327edfba844f"}, - {file = "pyarrow-6.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fb701ec4a94b92102606d4e88f0b8eba34f09a5ad8e014eaa4af76f42b7f62ae"}, - {file = "pyarrow-6.0.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:da7860688c33ca88ac05f1a487d32d96d9caa091412496c35f3d1d832145675a"}, - {file = "pyarrow-6.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac941a147d14993987cc8b605b721735a34b3e54d167302501fb4db1ad7382c7"}, - {file = "pyarrow-6.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6163d82cca7541774b00503c295fe86a1722820eddb958b57f091bb6f5b0a6db"}, - {file = "pyarrow-6.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:376c4b5f248ae63df21fe15c194e9013753164be2d38f4b3fb8bde63ac5a1958"}, - {file = "pyarrow-6.0.0.tar.gz", hash = "sha256:5be62679201c441356d3f2a739895dcc8d4d299f2a6eabcd2163bfb6a898abba"}, -] -pyasn1 = [ - {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"}, - {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"}, -] -pyasn1-modules = [ - {file = "pyasn1-modules-0.2.8.tar.gz", hash = "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e"}, - {file = "pyasn1_modules-0.2.8-py2.py3-none-any.whl", hash = "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74"}, -] -pydata-google-auth = [ - {file = "pydata-google-auth-1.4.0.tar.gz", hash = "sha256:fe7d97f3392f5f4e4026bb3d6b2f77e5988c7b706c022235f34547214c8d8dba"}, - {file = "pydata_google_auth-1.4.0-py2.py3-none-any.whl", hash = "sha256:c0a1e7407adac70d1dbf4f95cc427934556c7c0c41c795ca6f9b969dbb082b2e"}, -] -pyparsing = [ - {file = "pyparsing-3.0.7-py3-none-any.whl", hash = "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484"}, - {file = "pyparsing-3.0.7.tar.gz", hash = "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea"}, -] -python-dateutil = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, -] -python-slugify = [ - {file = "python-slugify-6.1.1.tar.gz", hash = "sha256:00003397f4e31414e922ce567b3a4da28cf1436a53d332c9aeeb51c7d8c469fd"}, - {file = "python_slugify-6.1.1-py2.py3-none-any.whl", hash = "sha256:8c0016b2d74503eb64761821612d58fcfc729493634b1eb0575d8f5b4aa1fbcf"}, -] -pytz = [ - {file = "pytz-2021.3-py2.py3-none-any.whl", hash = "sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c"}, - {file = "pytz-2021.3.tar.gz", hash = "sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326"}, -] -pyyaml = [ - {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, - {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, - {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, - {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, - {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, - {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, - {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, - {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, - {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, - {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, - {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, - {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, - {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, - {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, - {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, - {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, - {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, - {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, - {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, - {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, - {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, - {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, - {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, - {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, - {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, - {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, - {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, -] -requests = [ - {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"}, - {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"}, -] -requests-oauthlib = [ - {file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"}, - {file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"}, -] -rsa = [ - {file = "rsa-4.8-py3-none-any.whl", hash = "sha256:95c5d300c4e879ee69708c428ba566c59478fd653cc3a22243eeb8ed846950bb"}, - {file = "rsa-4.8.tar.gz", hash = "sha256:5c6bd9dc7a543b7fe4304a631f8a8a3b674e2bbfc49c2ae96200cdbe55df6b17"}, -] -"ruamel.yaml" = [ - {file = "ruamel.yaml-0.17.10-py3-none-any.whl", hash = "sha256:ffb9b703853e9e8b7861606dfdab1026cf02505bade0653d1880f4b2db47f815"}, - {file = "ruamel.yaml-0.17.10.tar.gz", hash = "sha256:106bc8d6dc6a0ff7c9196a47570432036f41d556b779c6b4e618085f57e39e67"}, -] -"ruamel.yaml.clib" = [ - {file = "ruamel.yaml.clib-0.2.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6e7be2c5bcb297f5b82fee9c665eb2eb7001d1050deaba8471842979293a80b0"}, - {file = "ruamel.yaml.clib-0.2.6-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:066f886bc90cc2ce44df8b5f7acfc6a7e2b2e672713f027136464492b0c34d7c"}, - {file = "ruamel.yaml.clib-0.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:221eca6f35076c6ae472a531afa1c223b9c29377e62936f61bc8e6e8bdc5f9e7"}, - {file = "ruamel.yaml.clib-0.2.6-cp310-cp310-win32.whl", hash = "sha256:1070ba9dd7f9370d0513d649420c3b362ac2d687fe78c6e888f5b12bf8bc7bee"}, - {file = "ruamel.yaml.clib-0.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:77df077d32921ad46f34816a9a16e6356d8100374579bc35e15bab5d4e9377de"}, - {file = "ruamel.yaml.clib-0.2.6-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:cfdb9389d888c5b74af297e51ce357b800dd844898af9d4a547ffc143fa56751"}, - {file = "ruamel.yaml.clib-0.2.6-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:7b2927e92feb51d830f531de4ccb11b320255ee95e791022555971c466af4527"}, - {file = "ruamel.yaml.clib-0.2.6-cp35-cp35m-win32.whl", hash = "sha256:ada3f400d9923a190ea8b59c8f60680c4ef8a4b0dfae134d2f2ff68429adfab5"}, - {file = "ruamel.yaml.clib-0.2.6-cp35-cp35m-win_amd64.whl", hash = "sha256:de9c6b8a1ba52919ae919f3ae96abb72b994dd0350226e28f3686cb4f142165c"}, - {file = "ruamel.yaml.clib-0.2.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d67f273097c368265a7b81e152e07fb90ed395df6e552b9fa858c6d2c9f42502"}, - {file = "ruamel.yaml.clib-0.2.6-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:72a2b8b2ff0a627496aad76f37a652bcef400fd861721744201ef1b45199ab78"}, - {file = "ruamel.yaml.clib-0.2.6-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:d3c620a54748a3d4cf0bcfe623e388407c8e85a4b06b8188e126302bcab93ea8"}, - {file = "ruamel.yaml.clib-0.2.6-cp36-cp36m-win32.whl", hash = "sha256:9efef4aab5353387b07f6b22ace0867032b900d8e91674b5d8ea9150db5cae94"}, - {file = "ruamel.yaml.clib-0.2.6-cp36-cp36m-win_amd64.whl", hash = "sha256:846fc8336443106fe23f9b6d6b8c14a53d38cef9a375149d61f99d78782ea468"}, - {file = "ruamel.yaml.clib-0.2.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0847201b767447fc33b9c235780d3aa90357d20dd6108b92be544427bea197dd"}, - {file = "ruamel.yaml.clib-0.2.6-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:78988ed190206672da0f5d50c61afef8f67daa718d614377dcd5e3ed85ab4a99"}, - {file = "ruamel.yaml.clib-0.2.6-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:210c8fcfeff90514b7133010bf14e3bad652c8efde6b20e00c43854bf94fa5a6"}, - {file = "ruamel.yaml.clib-0.2.6-cp37-cp37m-win32.whl", hash = "sha256:a49e0161897901d1ac9c4a79984b8410f450565bbad64dbfcbf76152743a0cdb"}, - {file = "ruamel.yaml.clib-0.2.6-cp37-cp37m-win_amd64.whl", hash = "sha256:bf75d28fa071645c529b5474a550a44686821decebdd00e21127ef1fd566eabe"}, - {file = "ruamel.yaml.clib-0.2.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a32f8d81ea0c6173ab1b3da956869114cae53ba1e9f72374032e33ba3118c233"}, - {file = "ruamel.yaml.clib-0.2.6-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7f7ecb53ae6848f959db6ae93bdff1740e651809780822270eab111500842a84"}, - {file = "ruamel.yaml.clib-0.2.6-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:61bc5e5ca632d95925907c569daa559ea194a4d16084ba86084be98ab1cec1c6"}, - {file = "ruamel.yaml.clib-0.2.6-cp38-cp38-win32.whl", hash = "sha256:89221ec6d6026f8ae859c09b9718799fea22c0e8da8b766b0b2c9a9ba2db326b"}, - {file = "ruamel.yaml.clib-0.2.6-cp38-cp38-win_amd64.whl", hash = "sha256:31ea73e564a7b5fbbe8188ab8b334393e06d997914a4e184975348f204790277"}, - {file = "ruamel.yaml.clib-0.2.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dc6a613d6c74eef5a14a214d433d06291526145431c3b964f5e16529b1842bed"}, - {file = "ruamel.yaml.clib-0.2.6-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:1866cf2c284a03b9524a5cc00daca56d80057c5ce3cdc86a52020f4c720856f0"}, - {file = "ruamel.yaml.clib-0.2.6-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:1b4139a6ffbca8ef60fdaf9b33dec05143ba746a6f0ae0f9d11d38239211d335"}, - {file = "ruamel.yaml.clib-0.2.6-cp39-cp39-win32.whl", hash = "sha256:3fb9575a5acd13031c57a62cc7823e5d2ff8bc3835ba4d94b921b4e6ee664104"}, - {file = "ruamel.yaml.clib-0.2.6-cp39-cp39-win_amd64.whl", hash = "sha256:825d5fccef6da42f3c8eccd4281af399f21c02b32d98e113dbc631ea6a6ecbc7"}, - {file = "ruamel.yaml.clib-0.2.6.tar.gz", hash = "sha256:4ff604ce439abb20794f05613c374759ce10e3595d1867764dd1ae675b85acbd"}, -] -setuptools = [ - {file = "setuptools-65.5.1-py3-none-any.whl", hash = "sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31"}, - {file = "setuptools-65.5.1.tar.gz", hash = "sha256:e197a19aa8ec9722928f2206f8de752def0e4c9fc6953527360d1c36d94ddb2f"}, -] -shapely = [ - {file = "Shapely-1.8.5.post1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d048f93e42ba578b82758c15d8ae037d08e69d91d9872bca5a1895b118f4e2b0"}, - {file = "Shapely-1.8.5.post1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99ab0ddc05e44acabdbe657c599fdb9b2d82e86c5493bdae216c0c4018a82dee"}, - {file = "Shapely-1.8.5.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:99a2f0da0109e81e0c101a2b4cd8412f73f5f299e7b5b2deaf64cd2a100ac118"}, - {file = "Shapely-1.8.5.post1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6fe855e7d45685926b6ba00aaeb5eba5862611f7465775dacd527e081a8ced6d"}, - {file = "Shapely-1.8.5.post1-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ec14ceca36f67cb48b34d02d7f65a9acae15cd72b48e303531893ba4a960f3ea"}, - {file = "Shapely-1.8.5.post1-cp310-cp310-win32.whl", hash = "sha256:21776184516a16bf82a0c3d6d6a312b3cd15a4cabafc61ee01cf2714a82e8396"}, - {file = "Shapely-1.8.5.post1-cp310-cp310-win_amd64.whl", hash = "sha256:a354199219c8d836f280b88f2c5102c81bb044ccea45bd361dc38a79f3873714"}, - {file = "Shapely-1.8.5.post1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:783bad5f48e2708a0e2f695a34ed382e4162c795cb2f0368b39528ac1d6db7ed"}, - {file = "Shapely-1.8.5.post1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a23ef3882d6aa203dd3623a3d55d698f59bfbd9f8a3bfed52c2da05a7f0f8640"}, - {file = "Shapely-1.8.5.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ab38f7b5196ace05725e407cb8cab9ff66edb8e6f7bb36a398e8f73f52a7aaa2"}, - {file = "Shapely-1.8.5.post1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8d086591f744be483b34628b391d741e46f2645fe37594319e0a673cc2c26bcf"}, - {file = "Shapely-1.8.5.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4728666fff8cccc65a07448cae72c75a8773fea061c3f4f139c44adc429b18c3"}, - {file = "Shapely-1.8.5.post1-cp311-cp311-win32.whl", hash = "sha256:84010db15eb364a52b74ea8804ef92a6a930dfc1981d17a369444b6ddec66efd"}, - {file = "Shapely-1.8.5.post1-cp311-cp311-win_amd64.whl", hash = "sha256:48dcfffb9e225c0481120f4bdf622131c8c95f342b00b158cdbe220edbbe20b6"}, - {file = "Shapely-1.8.5.post1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:2fd15397638df291c427a53d641d3e6fd60458128029c8c4f487190473a69a91"}, - {file = "Shapely-1.8.5.post1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a74631e511153366c6dbe3229fa93f877e3c87ea8369cd00f1d38c76b0ed9ace"}, - {file = "Shapely-1.8.5.post1-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:66bdac74fbd1d3458fa787191a90fa0ae610f09e2a5ec398c36f968cc0ed743f"}, - {file = "Shapely-1.8.5.post1-cp36-cp36m-win32.whl", hash = "sha256:6d388c0c1bd878ed1af4583695690aa52234b02ed35f93a1c8486ff52a555838"}, - {file = "Shapely-1.8.5.post1-cp36-cp36m-win_amd64.whl", hash = "sha256:be9423d5a3577ac2e92c7e758bd8a2b205f5e51a012177a590bc46fc51eb4834"}, - {file = "Shapely-1.8.5.post1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5d7f85c2d35d39ff53c9216bc76b7641c52326f7e09aaad1789a3611a0f812f2"}, - {file = "Shapely-1.8.5.post1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:adcf8a11b98af9375e32bff91de184f33a68dc48b9cb9becad4f132fa25cfa3c"}, - {file = "Shapely-1.8.5.post1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:753ed0e21ab108bd4282405b9b659f2e985e8502b1a72b978eaa51d3496dee19"}, - {file = "Shapely-1.8.5.post1-cp37-cp37m-win32.whl", hash = "sha256:65b21243d8f6bcd421210daf1fabb9de84de2c04353c5b026173b88d17c1a581"}, - {file = "Shapely-1.8.5.post1-cp37-cp37m-win_amd64.whl", hash = "sha256:370b574c78dc5af3a198a6da5d9b3d7c04654bd2ef7e80e80a3a0992dfb2d9cd"}, - {file = "Shapely-1.8.5.post1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:532a55ee2a6c52d23d6f7d1567c8f0473635f3b270262c44e1b0c88096827e22"}, - {file = "Shapely-1.8.5.post1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3480657460e939f45a7d359ef0e172a081f249312557fe9aa78c4fd3a362d993"}, - {file = "Shapely-1.8.5.post1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b65f5d530ba91e49ffc7c589255e878d2506a8b96ffce69d3b7c4500a9a9eaf8"}, - {file = "Shapely-1.8.5.post1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:147066da0be41b147a61f8eb805dea3b13709dbc873a431ccd7306e24d712bc0"}, - {file = "Shapely-1.8.5.post1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c2822111ddc5bcfb116e6c663e403579d0fe3f147d2a97426011a191c43a7458"}, - {file = "Shapely-1.8.5.post1-cp38-cp38-win32.whl", hash = "sha256:2e0a8c2e55f1be1312b51c92b06462ea89e6bb703fab4b114e7a846d941cfc40"}, - {file = "Shapely-1.8.5.post1-cp38-cp38-win_amd64.whl", hash = "sha256:0d885cb0cf670c1c834df3f371de8726efdf711f18e2a75da5cfa82843a7ab65"}, - {file = "Shapely-1.8.5.post1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0b4ee3132ee90f07d63db3aea316c4c065ed7a26231458dda0874414a09d6ba3"}, - {file = "Shapely-1.8.5.post1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:02dd5d7dc6e46515d88874134dc8fcdc65826bca93c3eecee59d1910c42c1b17"}, - {file = "Shapely-1.8.5.post1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c6a9a4a31cd6e86d0fbe8473ceed83d4fe760b19d949fb557ef668defafea0f6"}, - {file = "Shapely-1.8.5.post1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:38f0fbbcb8ca20c16451c966c1f527cc43968e121c8a048af19ed3e339a921cd"}, - {file = "Shapely-1.8.5.post1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:78fb9d929b8ee15cfd424b6c10879ce1907f24e05fb83310fc47d2cd27088e40"}, - {file = "Shapely-1.8.5.post1-cp39-cp39-win32.whl", hash = "sha256:8e59817b0fe63d34baedaabba8c393c0090f061917d18fc0bcc2f621937a8f73"}, - {file = "Shapely-1.8.5.post1-cp39-cp39-win_amd64.whl", hash = "sha256:e9c30b311de2513555ab02464ebb76115d242842b29c412f5a9aa0cac57be9f6"}, - {file = "Shapely-1.8.5.post1.tar.gz", hash = "sha256:ef3be705c3eac282a28058e6c6e5503419b250f482320df2172abcbea642c831"}, -] -six = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] -text-unidecode = [ - {file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"}, - {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"}, -] -toml = [ - {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, - {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, -] -tomlkit = [ - {file = "tomlkit-0.7.0-py2.py3-none-any.whl", hash = "sha256:6babbd33b17d5c9691896b0e68159215a9387ebfa938aa3ac42f4a4beeb2b831"}, - {file = "tomlkit-0.7.0.tar.gz", hash = "sha256:ac57f29693fab3e309ea789252fcce3061e19110085aa31af5446ca749325618"}, -] -tqdm = [ - {file = "tqdm-4.50.2-py2.py3-none-any.whl", hash = "sha256:43ca183da3367578ebf2f1c2e3111d51ea161ed1dc4e6345b86e27c2a93beff7"}, - {file = "tqdm-4.50.2.tar.gz", hash = "sha256:69dfa6714dee976e2425a9aab84b622675b7b1742873041e3db8a8e86132a4af"}, -] -typing-extensions = [ - {file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"}, - {file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"}, -] -urllib3 = [ - {file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"}, - {file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"}, -] -win32-setctime = [ - {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"}, - {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, -] -zipp = [ - {file = "zipp-3.7.0-py3-none-any.whl", hash = "sha256:b47250dd24f92b7dd6a0a8fc5244da14608f3ca90a5efcd37a3b1642fac9a375"}, - {file = "zipp-3.7.0.tar.gz", hash = "sha256:9f50f446828eb9d45b267433fd3e9da8d801f614129124863f9c51ebceafb87d"}, -] +lock-version = "2.0" +python-versions = ">=3.8.1,<4" +content-hash = "e142b4f345a0abf7efa67ac2af34ccaffea6577a17e4ac6ef7bb8826e02e297d" diff --git a/python-package/pyproject.toml b/python-package/pyproject.toml index ae16bfb98..05da42635 100644 --- a/python-package/pyproject.toml +++ b/python-package/pyproject.toml @@ -1,44 +1,55 @@ [tool.poetry] +name = "basedosdados" +version = "2.0.0-beta.16" +description = "Organizar e facilitar o acesso a dados brasileiros através de tabelas públicas no BigQuery." authors = ["Joao Carabetta ", "Ricardo Dahis", "Diego Oliveira"] classifiers = [ - "Topic :: Software Development :: Build Tools", - "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Software Development :: Build Tools", + "Topic :: Software Development :: Libraries :: Python Modules", ] -description = "Organizar e facilitar o acesso a dados brasileiros através de tabelas públicas no BigQuery." -homepage = "https://github.com/base-dos-dados/bases" +homepage = "https://github.com/basedosdados/mais" +repository = "https://github.com/basedosdados/mais" license = "MIT" -name = "basedosdados" +readme = "README.md" packages = [ - {include = "basedosdados"}, + {include = "basedosdados"}, ] -readme = "README.md" -repository = "https://github.com/base-dos-dados/bases" -version = "1.6.11" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4" +google-api-python-client = "^2.86" +google-cloud-bigquery = "^3.10" +google-cloud-bigquery-connection = "^1.12" +google-cloud-bigquery-storage = "^2.19" +google-cloud-storage = "^2.9" +gql = { version = "^3.4", optional = true } +loguru = "0.7.0" +pandas = "^2.0" +pandas-gbq = "^0.19" +pandavro = { version = "^1.7", optional = true } +pydata-google-auth = "^1.8" +requests-toolbelt = { version = "^1", optional = true } +tomlkit = "^0.11" +tqdm = "^4" +click = { version = ">=8.1,<9", optional = true } + +[tool.poetry.extras] +all = ["click", "gql", "pandavro", "requests-toolbelt"] +avro = ["pandavro"] +cli = ["click"] +upload = ["gql", "requests-toolbelt"] + +[tool.poetry.group.dev.dependencies] +black = "^23.7.0" +flake8 = "^6.1.0" +isort = "^5.12.0" +pre-commit = "^3.3.3" +semgrep = "^1.36.0" +taskipy = "^1.12.0" [tool.poetry.scripts] basedosdados = 'basedosdados.cli.cli:cli' -[tool.poetry.dependencies] -Jinja2 = "3.0.3" -ckanapi = "4.6" -click = "8.0.3" -google-cloud-bigquery = "2.30.1" -google-cloud-bigquery-storage = "1.1.0" -google-cloud-storage = "1.42.3" -importlib-metadata = "^4.11.3" -loguru = "^0.6.0" -pandas = "^1.3.5" -pandas-gbq = "^0.17.4" -pandavro = "^1.6.0" -pyaml = "20.4.0" -pyarrow = "6.0.0" -python = ">=3.7.1,<3.11" -'ruamel.yaml' = "0.17.10" -toml = "^0.10.2" -tomlkit = "0.7.0" -tqdm = "4.50.2" -shapely = "^1.6.0" - [tool.black] # Use the more relaxed max line length permitted in PEP8. exclude = ''' @@ -54,11 +65,17 @@ exclude = ''' )/ ''' line-length = 88 -target-version = ["py36", "py37", "py38"] +target-version = ["py38", "py39", "py310"] + +[tool.isort] +profile = "black" + +[tool.taskipy.tasks] +lint = "semgrep scan --error --config auto --exclude-rule yaml.github-actions.security.third-party-action-not-pinned-to-commit-sha.third-party-action-not-pinned-to-commit-sha && black . && isort . && flake8 ." [build-system] -build-backend = "poetry.masonry.api" -requires = ["poetry>=0.12"] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" [pytest] -addopts = "-p no:warnings" +addopts = "-p no:warnings" \ No newline at end of file diff --git a/python-package/requirements-dev.txt b/python-package/requirements-dev.txt deleted file mode 100644 index 1d15110d0..000000000 --- a/python-package/requirements-dev.txt +++ /dev/null @@ -1,60 +0,0 @@ -cachetools==4.2.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -certifi==2021.10.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -charset-normalizer==2.0.12 ; python_full_version >= "3.7.1" and python_version < "3.11" -ckanapi==4.6 ; python_full_version >= "3.7.1" and python_version < "3.11" -click==8.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -colorama==0.4.4 ; python_full_version >= "3.7.1" and python_version < "3.11" and platform_system == "Windows" or python_full_version >= "3.7.1" and python_version < "3.11" and sys_platform == "win32" -db-dtypes==1.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -docopt==0.6.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -fastavro==1.4.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-api-core==1.31.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-api-core[grpc]==1.31.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth-oauthlib==0.5.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-auth==1.35.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-cloud-bigquery-storage==1.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-cloud-bigquery==2.30.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-cloud-core==2.2.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-cloud-storage==1.42.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-crc32c==1.3.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -google-resumable-media==2.3.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -googleapis-common-protos==1.55.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -grpcio==1.44.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -idna==3.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -importlib-metadata==4.11.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -jinja2==3.0.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -loguru==0.6.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -markupsafe==2.1.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -numpy==1.21.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -oauthlib==3.2.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -packaging==21.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas-gbq==0.17.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandas==1.3.5 ; python_full_version >= "3.7.1" and python_version < "3.11" -pandavro==1.6.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -proto-plus==1.20.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -protobuf==3.19.4 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyaml==20.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyarrow==6.0.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1-modules==0.2.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyasn1==0.4.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -pydata-google-auth==1.4.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyparsing==3.0.7 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-dateutil==2.8.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -python-slugify==6.1.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -pytz==2021.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -pyyaml==6.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests-oauthlib==1.3.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -requests==2.27.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -rsa==4.8 ; python_full_version >= "3.7.1" and python_version < "3.11" -ruamel-yaml-clib==0.2.6 ; platform_python_implementation == "CPython" and python_version < "3.10" and python_full_version >= "3.7.1" -ruamel-yaml==0.17.10 ; python_full_version >= "3.7.1" and python_version < "3.11" -setuptools==65.5.1 ; python_full_version >= "3.7.1" and python_version < "3.11" -shapely==1.8.5.post1 ; python_full_version >= "3.7.1" and python_version < "3.11" -six==1.16.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -text-unidecode==1.3 ; python_full_version >= "3.7.1" and python_version < "3.11" -toml==0.10.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -tomlkit==0.7.0 ; python_full_version >= "3.7.1" and python_version < "3.11" -tqdm==4.50.2 ; python_full_version >= "3.7.1" and python_version < "3.11" -typing-extensions==4.1.1 ; python_full_version >= "3.7.1" and python_version < "3.8" -urllib3==1.26.9 ; python_full_version >= "3.7.1" and python_version < "3.11" -win32-setctime==1.1.0 ; python_full_version >= "3.7.1" and python_version < "3.11" and sys_platform == "win32" -zipp==3.7.0 ; python_full_version >= "3.7.1" and python_version < "3.11" diff --git a/python-package/setup.py b/python-package/setup.py index b6b01ba6a..577dbf53e 100644 --- a/python-package/setup.py +++ b/python-package/setup.py @@ -1,12 +1,12 @@ -''' +""" Setup script for the package. -''' +""" -from setuptools import setup, find_packages +from setuptools import find_packages, setup setup( name="basedosdados", - version="1.6.11", + version="2.0.0-beta.15", packages=find_packages(), include_package_data=True, install_requires=[ diff --git a/python-package/tests/conftest.py b/python-package/tests/conftest.py index 19ab4e2f9..ea76a3f11 100644 --- a/python-package/tests/conftest.py +++ b/python-package/tests/conftest.py @@ -1,14 +1,18 @@ """ Share fixtures for tests. """ + import shutil import sys + # pylint: disable=invalid-name, protected-access from pathlib import Path import pytest import ruamel.yaml as ryaml -from basedosdados import Metadata, Storage, Dataset, Table + +from basedosdados import Metadata # TODO: deprecate +from basedosdados import Dataset, Storage, Table from basedosdados.upload.base import Base DATASET_ID = "pytest" @@ -22,7 +26,10 @@ def fixture_testdir(): """ Fixture that returns a temporary directory for the metadata files. """ - (Path(__file__).parent / "tmp_bases").mkdir(exist_ok=True) + try: + (Path(__file__).parent / "tmp_bases").mkdir(exist_ok=False) + except Exception: + pass return Path(__file__).parent / "tmp_bases" @@ -262,7 +269,8 @@ def fixture_base(): """ Fixture for the base class """ - return Base() + config_path = Path.home() / ".basedosdados_teste" + return Base(config_path=config_path) @pytest.fixture(name="config_file_exists") @@ -300,9 +308,9 @@ def fixture_data_avro_path(sample_data): @pytest.fixture(name="table") def fixture_table(testdir): - ''' + """ Fixture for table object. - ''' + """ t = Table(dataset_id=DATASET_ID, table_id=TABLE_ID, metadata_path=testdir) t._refresh_templates() return t @@ -310,17 +318,17 @@ def fixture_table(testdir): @pytest.fixture(name="folder") def fixture_folder(testdir): - ''' + """ Fixture for folder object. - ''' + """ return testdir / DATASET_ID / TABLE_ID @pytest.fixture(name="python_path") def fixture_python_path(): - ''' + """ Fixture for python_path - ''' + """ python_path = sys.executable if "python" not in python_path: @@ -331,8 +339,142 @@ def fixture_python_path(): @pytest.fixture(name="default_metadata_path") def fixture_default_matadata_path(): - ''' + """ Fixture for default_metadata_path - ''' + """ mt = Metadata(dataset_id=DATASET_ID, table_id=TABLE_ID) return mt.metadata_path + + +############################################################ +# Conftest for new api +############################################################ + +API_DATASET_ID = "dados_mestres" +API_TABLE_ID = "bairro" +API_NEW_DATASET_ID = "new_dataset" +API_NEW_TABLE_ID = "new_table" +API_PUBLISH_DATASET_ID = "br_ipea_teste_avs" +API_PUBLISH_TABLE_ID = "municipios" + + +@pytest.fixture(name="api_dataset") +def fixture_api_dataset(testdir): + """ + Fixture for the dataset class + """ + config_path = Path.home() / ".basedosdados_teste" + return Dataset( + dataset_id=API_DATASET_ID, metadata_path=testdir, config_path=config_path + ) + + +@pytest.fixture(name="api_dataset_metadata_path") +def fixture_api_dataset_metadata_path(testdir): + """ + Fixture that returns the path to the dataset metadata file. + """ + return Path(testdir) / API_DATASET_ID + + +@pytest.fixture(name="api_table_metadata_path") +def fixture_api_table_metadata_path(testdir): + """ + Fixture that returns the path to the table metadata file. + """ + return Path(testdir) / API_DATASET_ID / API_TABLE_ID + + +@pytest.fixture(name="api_dataset_metadata") +def fixture_api_dataset_metadata(testdir): + """ + Fixture that returns a `Metadata` object for the dataset. + """ + config_path = Path.home() / ".basedosdados_teste" + return Metadata( + dataset_id=API_DATASET_ID, metadata_path=testdir, config_path=config_path + ) + + +@pytest.fixture(name="api_new_dataset_metadata") +def fixture_new_dataset_metadata(testdir): + """ + Fixture that returns a `Metadata` object for a new dataset. + """ + config_path = Path.home() / ".basedosdados_teste" + return Metadata( + dataset_id=API_NEW_DATASET_ID, metadata_path=testdir, config_path=config_path + ) + + +@pytest.fixture(name="api_table_metadata") +def fixture_api_table_metadata(testdir): + """ + Fixture that returns a `Metadata` object for the table. + """ + config_path = Path.home() / ".basedosdados_teste" + return Metadata( + dataset_id=API_DATASET_ID, + table_id=API_TABLE_ID, + metadata_path=testdir, + config_path=config_path, + ) + + +@pytest.fixture(name="api_new_table_metadata") +def fixture_new_table_metadata(testdir): + """ + Fixture that returns a `Metadata` object for a new table. + """ + config_path = Path.home() / ".basedosdados_teste" + return Metadata( + dataset_id=API_NEW_DATASET_ID, + table_id=API_NEW_TABLE_ID, + metadata_path=testdir, + config_path=config_path, + ) + + +@pytest.fixture(name="api_outdated_dataset_metadata") +def fixture_outdated_dataset_metadata(testdir): + """ + Fixture that returns an outdated dataset `Metadata` object for a new dataset. + """ + config_path = Path.home() / ".basedosdados_teste" + return Metadata( + dataset_id=API_OUTDATED_DATASET_ID, # noqa + metadata_path=testdir, + config_path=config_path, + ) + + +@pytest.fixture(name="api_outdated_table_metadata") +def fixture_outdated_table_metadata(testdir): + """ + Fixture that returns an outdated dataset `Metadata` object for a new table. + """ + config_path = Path.home() / ".basedosdados_teste" + return Metadata( + dataset_id=API_OUTDATED_DATASET_ID, # noqa + table_id=API_OUTDATED_TABLE_ID, # noqa + metadata_path=testdir, + config_path=config_path, + ) + + +@pytest.fixture(name="api_ipea_table_metadata") +def fixture_ipea_table_metadata(testdir): + """ + Fixture to test metadata created and filled by the user. + Args: + testdir (str): Path to the test directory. + Returns: + Metadata: Metadata object for the table. + """ + config_path = Path.home() / ".basedosdados_teste" + return Metadata( + dataset_id=API_PUBLISH_DATASET_ID, + table_id=API_PUBLISH_TABLE_ID, + metadata_path=testdir, + config_path=config_path, + ) diff --git a/python-package/tests/sample_data/config.toml b/python-package/tests/sample_data/config.toml index 07111b707..f3e0a8cea 100644 --- a/python-package/tests/sample_data/config.toml +++ b/python-package/tests/sample_data/config.toml @@ -1,21 +1,16 @@ -# Where to save all metadata data, such as Dataset and Table config files. -# We reccomend this path to be the same as the github repo clone -metadata_path = "~/bases" - # What is the bucket that you are saving all the data? It should be # an unique name. -bucket_name = "basedosdados" - -# Where the templates to generate configs and descriptions sit -templates_path = "~/.basedosdados/templates" +bucket_name = "basedosdados-dev" [gcloud-projects] -staging = "basedosdados-staging" -prod = "basedosdados" -# Data about you to auto-fill config -# TODO: issue 20 -[user] -name = "" -email = "" -website = "" \ No newline at end of file +[gcloud-projects.staging] +credentials_path = "~/.basedosdados/credentials/staging.json" +name = "basedosdados-dev" + +[gcloud-projects.prod] +credentials_path = "~/.basedosdados/credentials/prod.json" +name = "basedosdados-dev" + +[api] +url = "https://staging.api.basedosdados.org/api/v1/graphql" diff --git a/python-package/tests/sample_data/table/arquitetura_municipio.xlsx b/python-package/tests/sample_data/table/arquitetura_municipio.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2433c261e8d71d0484a0eaf3209bcddbab98e480 GIT binary patch literal 9457 zcmd^F1yo$klE#_9-~=bQYj7uMa0wC!?h-t>y9Fn>1qdD>xDOsAVSo@IxCeI_EDQ_D zdz<{({r`RE?b|(j&eoifTXVa*@7Le0uBzKA^6&__FaQ7m2G`g`1LhYYxc%*J!)EGa z$l4kq>Zo&^6|*o)O?k;OdVkv7;~2*b&Fa4x;DJ`i{!+ zSb4!du>Bxs8p|iD`m-mS$$oFWA0+{eW_A z&W>2t6KRpDUl<@j_VU!gOiRCK`{N!0fTQ2ErjI0|J|bQ+;EkVar3kLn7#@R8>{(Aj zab>CK*$f&A#up7X=*i~g)@rv+Q#zX+Sh>4qU~^#StFa@0yN8$WWy8(&MUk&(^)qFA z)}p5`Go~&!Ma|2`MBBbK;FIBh5Y^l_uQ?qs56l?9*lanygj`HbZ7t93cs6_1H)hS* z_#T^AwPsiFin1@^t@eL)mbRo}WPao~Lh>?pr+jiD6c}$(A%8Qr*lU5xtM~Xs5_%dImC& zC&$*v_U-+q6+Butm(MQ7y{}_!4X&(2GIKx%^Fj+lhhw1VmWRWH0es&GlB0h#l7W6t;RbXtVdlvg@TQWX9&Y zVa6cEg3Tav*I>c-*zb6%Z~W;(&a7TxI~$(g?DfHiwTsPFXVH!A>70fDXtPk0d;2rC ziW`B9cAtj%S@a^hlp5Z#v!~mlY!y(EcHQ=w*5sVETEA;QJhHRiwJBG~%ieM8t9`rb z;T0w2ign&IgQA>z1JU^#TBYoydg6=o)J9^QGZP-<8Rx^chgQ6_KKx34)(Z2{0|qgi zkeiaeuvc?&M{DdC5fm=GZ+!XX{3g2G`fBG=AubkezHSN&(VGT2n{?ogFVTbsF^M@L z-{u|~9r1q}z*xl*F#sHX@HqD-0`FDk?iO5lHfMvpt8~4EwCB<0Kf0d{wcATq)+gFl zw2prr$6%vl1B=X}&1tkYSB4vC1c%-C6R8NtVA3ZeqY-|49=pPS)E$`<%QV;>*-rBG z^X=cl6_yxGzWfy%7);RPM&lhOFweT1U&0EMx^Fk`x3)M*lg?rkavrGt~T8#f*kY1Rfs1aIG(n!e%p+k`;VhZZc`cV^gTA|a=5kyBR7j-0R78XFu(pj0$r zyD8m}54_WrSCDPhtHXooO63NFSkITpG?lUP=rcF+*U}BP3$}uLki`dHPM>XWq#H~YcKP-o2O=cOZh|o_!mSvw z@-fY%3mdM-6D@ugnIvak?kl_2pqT@cW0HB#mUR2tZwGp2$IzN9&MMnIP`4=8FbG$g zMd4X$-8)O8@vg!UJyc}8!U#NT4%tN6bcIi*3ABk^X>SIo6=|(Fv=ax?6(&rpX%o5A z-VT^5(mwy88a0iiRpUy_8^Bd8wB(RVWJp)|uHsHB9Z**+wC2!AEJ;@wH!Y=AyHjx~ z7FuyACR(K{jG3m>j&Y?G{!r;9Zl)_tnl{spai>)bI4c%D=eSdS95D^0<>X4s9RMnt zTXIMw68=!(&=M)8TJjj>Ed@su-X18YT8bFu!G=W8;?feSraDLmaH3?YE=Ivw;PT~n z5F&!<|E5}v(n5k8dt^uqh=LDrA!o-xfCO|r!qfu5;fNVh0iqBBlE~R{5L^R0R4}zr z;MBwnnE+9U0d?f;_y~}|4ns^WAY7uDAtxXTDPWwOofyG2sKWtM3mtAu%uo;zg&c57 z&Q68^3F`32)WU?r5jT_oL;(WsQ?OGZxCVE`VQOK+sfioP1ETH)a8t0;AwYsVUSn$E z!X=6ussW-<0yHStnGjqRz;Ry)u>~cJEo63p5m^*fod2v6+7l#tv^XsEtwRDMGhP(3I4pb~ zgaKU*620vBmv!>r9OIufPs^gH;WTv({RobmBIFR1Ft_O60siZip!b1n)r)1!=Nzb_ z@4#>tt4u8CPf$Snm?{10A@kp=w@FeEGY9Vf19Xct)J3n7Z2#Q&xeRGQDW zy|FED*%bq-Ucq4A?I*lgjsZ32v+ei)FAislkWo;=%%XS)n3R=U&8hAhItW(zzo7)h z2HR6F3Cf;pVuMnG?P(9;TVx%iK{>(p^qv3eF><+vz6XzdAtVx%FtM=Q0Y+!#R&oBH zSAt#?*z0Y2J-z&z1oA4dHy8kq_!`2?gZmranf`NE0S1pu5ef)OSXgZA0JE}kYdD=< zL;v3?33h-%Sw`MBNP^vl7XSa0>>fl!;P3g>_NM3MQSYvkZw+*R=G5D< zX8POKT}EOswl;Gyw6%KH>&bca&AB>jeaN^fn`Pg(Im3^qZT7~2+pA_?Gb@{UKeqbX zx2b)Y;q>o&*j0>awN>opmtCwm9$MGY_4Jo&RK_mOm2i^$e_ z?Uj><*HhMuJ{Yj>3*IzpL92G5Yd7$Bsq2^1hb=DXFfe%)f0epY{ip)`lDZl=nc4iB zz_Q*QM-R)Rbg^S1E#kO`v3n&>goJbLN(b8&JTR0g#ISileKgpZ#ocNLMndT?_99UDC7*anY>^DMfihT13>px9Qb#APzb>zL_nN?4`Zm71`C ziZw2SMCF!kqvVMXEEEbAb63YFk+F$1X6h<}Jxv*S#zIlsj4;8H4Xx5!P>w`b(nFg% zSqT||-EY#OeF%I-?Esx_>rSx1lhli-j6B7;!=Su)Te0T$3K?%x_&-nIv2M>WwKq|5 zvUhmFW@7JTc2@$ijbD)KX2%W7>2L!y*^j-o`#9p-Z~7D^@P3#Zg|KVqH>!wj2&8^a zQ_3x+dtcB%$tiMNjHwox>-oXEu8|HuU4hUj{E|4UoV|~z{hGEF)qpZgZ=?> zg;~=c3o|O(f~zBIT&_TWI5b)$aWy&LF97|pGBlH;m!CNvkm^dYElH9EW_DC;e_|yY z_4SpjSB_;{QNytct;F^(TNVP~Ja7+2I)!s>-b-j*+Ya*#D$|x;m#vqn_E1cTny>l> zX(RW$L|)po@u`Dw(A&Yxl?ew2WaR~Z(@{7@yMt7GxRM(87S7ebgcRoot(+~*Y|Yrd ze||593_aDcpJvB}B3@x6FHFt?U=h?=aCeQy-c(r)(>h{T#^6j9>fKzE&|$7mXfh?; zmOuIy_@Url5p$1g-uy+_7h+EJf#4)GSaYKXSjle}D|1?gIRU|v%;H=Y?ua?>{3o}S z@!0akp3d|!Yg-`HMKVau#Q#mH#rXjX6|YT0J`Z9Vy_;CC z935Us4R+56>jQFX6E3AvmK48yg$oB4%F*kXW6X@Gnf}$*TNGB8et0uX08C7a07NDj1cKR@DMba2aQj zRGk*z%)+p+6Y1RRJ%}8Cn1cnW@Z$c;GG;{@_?9$a|I&R6&=dTr%{%YBY@|n0Nw!u^ zTzFJ&-5i8L`b_E;*(K>4B*W=r28A>(i+1#d#!NbGT6WPJd2@7`$5^nXo9bi<%u@3JZoT;6 zQ*V(>YqvOQcCT1no$Ho>Y6wuMlym)JP$4|>e$;qDVW@WQ&b{PE{+4+U7<1pkUyJg} zcZSST#%r>-sbAz^Mex6-R;#ls45$vxSV4mH(eO5i>aV`FOK>DvQHhaBFL_oL1*`yJ z=_(!NQ?@DRKX!Qi7z$tV=psN3iJekBJD(8C4%ZE*7wn=z)*CRH?fY?rQp)A=L$+g- ziara0P_lHZNv7xY!C|Iu5fxX}Pu4$?Sdn*but0nj=MC4;YL1Zin=d~4g5+DoR7c}`rwyCuB6fxWxc8aZKm8E*@ZO}AeB z&%GD!k6Z19rIC}FshYFXT`~90fz_#t+0C$HdW+pW+V~8oWZEJ|gisw=XoX5fbP@+) zeA^%0>MvQUI~bm$b`qB;x>zPMQ81@Zjc;jm(m%BZ468A9J^uXJIy!q`N{T};hZd#W zHPog~1|;Oo*CL0D{0_<5!ZF?^E-~Yq)O(UwHg&BsniRE@R)d})Xa`NtN9EQ=FOUM6 zeUhBEv4EAB5^6=qnWA*Y6*g(=_ zYz3!xN@&{2XBa8?5(nij5;E24?U`Wxp`w~HqiBWEa8X5;Xb8*x=OV4`h9~M4)*e@f zZJzmim$p)!(F%3y=()r5X{>y8wXkh`ji-)?a^9!IF=o!KDa7xw@MYgK6G^eaOPMp* ze;a34b!iu_r4Nd1Nw_M{)iQkQiCc1wb#AXdnUC2%`0^0NG-jEAuxL<}U!@bVxGxG( zU0*+OuVaki1nIMVA@mB1QI+z!06N_VfMUDgAQh1#iEQlgIsQHET`Niv)$=#oF>Ch+ z_ClE>I8KJIF?i!2k;RKWQz*hnO$g6jko{!a&h-3K2Ub_gMsNr{427d{f%tS9yVq_Sf;6W=h3gfd-Rt9RVC#K>f>(| zyfz;UzG)MPk$8JaRjrtEGPy56i-}~-kS~O}fRXcJFWM(*wL$}1_{DbcC_}blA%WwN z0>cSoZ+r5^IJY!dP49^nm$G9YDd>X;4cIL~bRyFW0We+A2mv+G^i-uYZP6Ak5m6>p zMS8ODMmD~8de;0-R+b}2sU2G?eHIJ7V25g;_KLz9-A8)kQaN_taAyfSfEj#( zKrlb$W7Vr#y9${4Tzy<|l!en$hh%kWudr0@=Q+!r@icpg!fN+?P@*ZdyP~-8JYM@x zd<=Z7)@uUhWt1|Ka)40s*wNHhb;?qjeq)=5X=w-?4AP_fw7)x*%ubqO_*5$L*n9d$ zDOUvj7^>sHk;|%lZvo&jW%d!5w z2h>)*R0_^(w9f+E6z~Tgw@$7f){jTPRR^Ps;j%IS6LJDd7j~?fPR2g1cAfD6AFye; z*M%I;(?>zIrkOov!X#L1T@q%ewDy+2l6vN!zSBf(@DL@1mgn(kwmx!=8|diV#addo z7H+6Z!Q=MR_`KnK%{!51u})g&4KieTiE8X~(GQ9n5++m3Xw$Aa-Lvd&RXmj7JHgI3 zW_g1z@E-CWV3@pX7ud702Ty*5ddD@0c!NycP!O(&t?bBI+*;IU7lRSY|vRoI-;DkTg z<8XKuxT&jW+@PYgJ)p%a=Zz|$NyJx4O89}?M9h${Y{5~jJ^g?N+YBrU;_5H-ESz#Fo85ZezXq7cMMQU8I#PHy1^-!`!urqNCvLYqtXsQkssRc=H4I+?M?e_?0EK9pCDC?t#y62DTu|3w-e>cv8Y`PB2h2W@kV?;^Hy%wo&F0?s zIdZSLb|&=-!z4aQz}E&7x?_Nly@e8R(*u8S)u#Ujndf*M0m&7q#m^IVxn zQzL%~>$EeVC=Er-3h&8l6g-(DG7qko%pr~shu?vyCEn!9+w+N&oIbfemD+xOzoS(} z1~vPM!Y)l}hJ2T|p`{7hBf7ZJOKHWvWn;UWjtvQF=XM#qQP(&tXU;nN@N)Pk((wXy zu}X}%76z@~;O+puk!)+ccMIrUr10la0_rWGKhXS+U_qRs^(}(Z$|Hi|Q++F@s&WyT ziVUXskzW2KT=gL=(Q&VMntY^kYeSh)^uWVs?wnWMy=+@qYOdH^HW_%h$W#*{s@>0T zT0LNrP!Z){B1|~kh75alH(jVn=>v%`wKZh=i3*=AQyKH%cnwBNqPuX&9I0EUNk^HQ z6A`-Edc|(?!Z+VnBP|bguGLYw=03Xlve6V*AZ|&0NL!e$)H|-Gu57iM*BQw_?h2i@ zWF$POAKBX%=Kz8a5>so|RqQ; zJ2-A1e%qQk8<`q88^LbE;N8IV;!T?oJW3&7D=q$nb?@_LJWs#->}Ub3dzgN7mKuZn ze9a&)7$u||K?6>OY=BXiuea1~+Zohe~XC z3fkk1?vsTevD3wUf1nO^TXdmP2=8lcee+%Yve!;L1$ZxVsra83GiXwZEqTts%8Qe= zG?<@ziZGgc^srXs*^}tZD%99b`jnK<>E3|e)B5hA^T(19iee1HaX){(5h`TkklQ=d`L*9T@h!;UhlZl_t!&IU$2r zWo=Si8&GkEh)^KCq#=M)-dRKa&D0o`3zRyUkf0h42$hqpD6`pIc{^v%6^40%XiYI* zCg&IUW~Ly+mPvTe#D=uzRKzG6h(rJ_!egY!^H`zp77yFHz;^It>GMei$S(=KX$rAy z;Li_hkCI=aDx^Rmnp!?H?wwLm`J}i=!!)kNHMD%;(Usr>>Mf9g+k$i9bCrj$@c7AoEch2&s z?2-OqCHcMKua4!94X+=3AHd&z?(YqMCHo&Ijhg&F^+GE0h)8$C0^Yu`Zx_D-io2tK E105-CDgXcg literal 0 HcmV?d00001 diff --git a/python-package/tests/sample_data/table/publish.sql b/python-package/tests/sample_data/table/publish.sql index 38161e190..8102f9972 100644 --- a/python-package/tests/sample_data/table/publish.sql +++ b/python-package/tests/sample_data/table/publish.sql @@ -19,7 +19,7 @@ TIPOS: */ CREATE VIEW basedosdados-dev.pytest.pytest AS -SELECT +SELECT id_municipio, ano, PIB, diff --git a/python-package/tests/sample_data/table/publish_part.sql b/python-package/tests/sample_data/table/publish_part.sql index a7967ba0b..0aad5d56e 100644 --- a/python-package/tests/sample_data/table/publish_part.sql +++ b/python-package/tests/sample_data/table/publish_part.sql @@ -19,7 +19,7 @@ TIPOS: */ CREATE VIEW basedosdados-dev.pytest.pytest_partitioned AS -SELECT +SELECT id_municipio, ano, PIB, diff --git a/python-package/tests/sample_data/table/table_config.yaml b/python-package/tests/sample_data/table/table_config.yaml deleted file mode 100644 index 870b43577..000000000 --- a/python-package/tests/sample_data/table/table_config.yaml +++ /dev/null @@ -1,158 +0,0 @@ -source_bucket_name: basedosdados-dev # AUTO GENERATED -project_id_staging: basedosdados-dev # AUTO GENERATED -project_id_prod: basedosdados-dev # AUTO GENERATED -table_id: pytest # AUTO GENERATED -dataset_id: pytest # AUTO GENERATED - -version: v1.1 - -last_updated: 2021-01-15 # AUTO GENERATED - -# Descreva a tabela. Essas são as primeiras frases que um usuário vai ver. -# Você não precisa ser muito conciso. Sinta-se a vontade para dar exemplos de -# como usar os dados. -# Se souber, liste também aplicações: pesquisa, apps, etc. que usem os dados. -description: | # REQUIRED - Produto Interno Bruto (PIB) municipal a preços correntes. - -# Qual organização/departamento/pessoa tratou os dados? -# As vezes há um ponto intermediário entre os dados originais e subir na Base dos Dados. -# Se essa pessoa é você, preencha abaixo com suas informações. -treated_by: - name: Ricardo Dahis - code_url: https://github.com/basedosdados/mais/tree/master/bases/br_ibge_pib/code # REQUIRED - website: - email: rdahis@basedosdados.org - -# Quem está completando esse arquivo config? -published_by: - name: Ricardo Dahis - code_url: https://github.com/basedosdados/mais/tree/master/bases/br_ibge_pib/code # REQUIRED - website: www.ricardodahis.com - email: rdahis@basedosdados.org - -# Se houve passos de tratamento, limpeza e manipulação de dados, descreva-os aqui. -treatment_description: | - - Mudamos a unidade de medida para R$1. - -# Com qual frequência a base é atualizada? -# Opções: hora | dia | semana | mês | 1 ano | 2 anos | 5 anos | 10 anos | único | recorrente -data_update_frequency: 1 ano - -# Nível da observação: quais colunas identificam uma linha unicamente? -# Escolha todas as opções necessárias. -# Regras: minúsculo, sem acento, singular. -# Opções (exemplos): pais, estado, municipio, cidade, hora, dia, semana, mes, ano, etc. -observation_level: - - municipio - - ano - -# Quais colunas identificam uma linha unicamente? -# Preencha com os nomes de colunas. Ex: id_municipio, ano. -# Pode ser vazio pois certas tabelas não possuem identificadores. -primary_keys: - - - -# Qual é a cobertura espacial da tabela? -# Regras: -# - minúsculo, sem acento, singular -# - descer até o menor nível administrativo cuja cobertura abaixo seja 'todos' -# Exemplo 1: tabela que cubra todos os municípios nos estados de SP e GO -# - brasil -# - SP, GO -# Exemplo 2: tabela que cubra países inteiros na América Latina -# - brasil, argentina, peru, equador -coverage_geo: - - brasil - -# Qual é a cobertura temporal (em anos) da tabela? -# Opções: pre-1999, 2000, 2001, ..., 2019, 2020 -coverage_time: - - 2002 - - 2003 - - 2004 - - 2005 - - 2006 - - 2007 - - 2008 - - 2009 - - 2010 - - 2011 - - 2012 - - 2013 - - 2014 - - 2015 - - 2016 - - 2017 - - 2018 - -# Liste as colunas da tabela que representam partições. -# Não esqueça de deletar essas colunas nas tabelas .csv na hora de subir para o BigQuery. -# Isso poupará muito tempo e dinheiro às pessoas utilizando essa tabela. -# Se não houver partições, não modifique abaixo. -partitions: # REQUIRED - -# Quais são as colunas? Certifique-se de escrever uma boa descrição, as pessoas vão gostar -# para saber sobre o que é a coluna. -# Adicionar todas as colunas manualmente pode ser bastante cansativo, por isso, quando -# inicializando este arquivo de configuração, você pode apontar a função para uma amostra de dados que -# preencherá automaticamente as colunas. -# Algumas colunas existirão apenas na tabela final, você as construirá em `publish.sql`. -# Para esses, defina is_in_staging como False. -# Além disso, você deve adicionar as colunas de partição aqui e definir is_partition como True. -columns: # REQUIRED - - - - name: id_municipio - description: ID Município - IBGE 7 Dígitos - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: ano - description: Ano - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: PIB - description: Produto Interno Bruto a preços correntes - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: impostos_liquidos - description: Impostos, líquidos de subsídios, sobre produtos a preços correntes - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA - description: Valor adicionado bruto a preços correntes total - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA_agropecuaria - description: Valor adicionado bruto a preços correntes da agropecuária - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA_industria - description: Valor adicionado bruto a preços correntes da indústria - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA_servicos - description: Valor adicionado bruto a preços correntes dos serviços, exclusive administração, defesa, educação e saúde públicas e seguridade social - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA_ADESPSS - description: Valor adicionado bruto a preços correntes da administração, defesa, educação e saúde públicas e seguridade social - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. diff --git a/python-package/tests/sample_data/table/table_config_part.yaml b/python-package/tests/sample_data/table/table_config_part.yaml deleted file mode 100644 index d8cf6920a..000000000 --- a/python-package/tests/sample_data/table/table_config_part.yaml +++ /dev/null @@ -1,164 +0,0 @@ -source_bucket_name: basedosdados-dev # AUTO GENERATED -project_id_staging: basedosdados-dev # AUTO GENERATED -project_id_prod: basedosdados-dev # AUTO GENERATED -table_id: pytest # AUTO GENERATED -dataset_id: pytest # AUTO GENERATED - -version: v1.1 - -last_updated: 2021-01-15 # AUTO GENERATED - -# Descreva a tabela. Essas são as primeiras frases que um usuário vai ver. -# Você não precisa ser muito conciso. Sinta-se a vontade para dar exemplos de -# como usar os dados. -# Se souber, liste também aplicações: pesquisa, apps, etc. que usem os dados. -description: | # REQUIRED - Produto Interno Bruto (PIB) municipal a preços correntes. - -# Qual organização/departamento/pessoa tratou os dados? -# As vezes há um ponto intermediário entre os dados originais e subir na Base dos Dados. -# Se essa pessoa é você, preencha abaixo com suas informações. -treated_by: - name: Ricardo Dahis - code_url: https://github.com/basedosdados/mais/tree/master/bases/br_ibge_pib/code # REQUIRED - website: - email: rdahis@basedosdados.org - -# Quem está completando esse arquivo config? -published_by: - name: Ricardo Dahis - code_url: https://github.com/basedosdados/mais/tree/master/bases/br_ibge_pib/code # REQUIRED - website: www.ricardodahis.com - email: rdahis@basedosdados.org - -# Se houve passos de tratamento, limpeza e manipulação de dados, descreva-os aqui. -treatment_description: | - - Mudamos a unidade de medida para R$1. - -# Com qual frequência a base é atualizada? -# Opções: hora | dia | semana | mês | 1 ano | 2 anos | 5 anos | 10 anos | único | recorrente -data_update_frequency: 1 ano - -# Nível da observação: quais colunas identificam uma linha unicamente? -# Escolha todas as opções necessárias. -# Regras: minúsculo, sem acento, singular. -# Opções (exemplos): pais, estado, municipio, cidade, hora, dia, semana, mes, ano, etc. -observation_level: - - municipio - - ano - -# Quais colunas identificam uma linha unicamente? -# Preencha com os nomes de colunas. Ex: id_municipio, ano. -# Pode ser vazio pois certas tabelas não possuem identificadores. -primary_keys: - - - -# Qual é a cobertura espacial da tabela? -# Regras: -# - minúsculo, sem acento, singular -# - descer até o menor nível administrativo cuja cobertura abaixo seja 'todos' -# Exemplo 1: tabela que cubra todos os municípios nos estados de SP e GO -# - brasil -# - SP, GO -# Exemplo 2: tabela que cubra países inteiros na América Latina -# - brasil, argentina, peru, equador -coverage_geo: - - brasil - -# Qual é a cobertura temporal (em anos) da tabela? -# Opções: pre-1999, 2000, 2001, ..., 2019, 2020 -coverage_time: - - 2002 - - 2003 - - 2004 - - 2005 - - 2006 - - 2007 - - 2008 - - 2009 - - 2010 - - 2011 - - 2012 - - 2013 - - 2014 - - 2015 - - 2016 - - 2017 - - 2018 - -# Liste as colunas da tabela que representam partições. -# Não esqueça de deletar essas colunas nas tabelas .csv na hora de subir para o BigQuery. -# Isso poupará muito tempo e dinheiro às pessoas utilizando essa tabela. -# Se não houver partições, não modifique abaixo. -partitions: keys # REQUIRED - -# Quais são as colunas? Certifique-se de escrever uma boa descrição, as pessoas vão gostar -# para saber sobre o que é a coluna. -# Adicionar todas as colunas manualmente pode ser bastante cansativo, por isso, quando -# inicializando este arquivo de configuração, você pode apontar a função para uma amostra de dados que -# preencherá automaticamente as colunas. -# Algumas colunas existirão apenas na tabela final, você as construirá em `publish.sql`. -# Para esses, defina is_in_staging como False. -# Além disso, você deve adicionar as colunas de partição aqui e definir is_partition como True. -columns: # REQUIRED - - - - name: id_municipio - description: ID Município - IBGE 7 Dígitos - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: ano - description: Ano - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: PIB - description: Produto Interno Bruto a preços correntes - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: impostos_liquidos - description: Impostos, líquidos de subsídios, sobre produtos a preços correntes - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA - description: Valor adicionado bruto a preços correntes total - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA_agropecuaria - description: Valor adicionado bruto a preços correntes da agropecuária - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA_industria - description: Valor adicionado bruto a preços correntes da indústria - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA_servicos - description: Valor adicionado bruto a preços correntes dos serviços, exclusive administração, defesa, educação e saúde públicas e seguridade social - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA_ADESPSS - description: Valor adicionado bruto a preços correntes da administração, defesa, educação e saúde públicas e seguridade social - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: keys - description: partition key - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: True # Bool [True, False], whether the column is a partition. \ No newline at end of file diff --git a/python-package/tests/sample_data/table/table_config_part_wrong.yaml b/python-package/tests/sample_data/table/table_config_part_wrong.yaml deleted file mode 100644 index 2c08cc584..000000000 --- a/python-package/tests/sample_data/table/table_config_part_wrong.yaml +++ /dev/null @@ -1,170 +0,0 @@ -source_bucket_name: basedosdados-dev # AUTO GENERATED -project_id_staging: basedosdados-dev # AUTO GENERATED -project_id_prod: basedosdados-dev # AUTO GENERATED -table_id: pytest # AUTO GENERATED -dataset_id: pytest # AUTO GENERATED - -version: v1.1 - -last_updated: 2021-01-15 # AUTO GENERATED - -# Descreva a tabela. Essas são as primeiras frases que um usuário vai ver. -# Você não precisa ser muito conciso. Sinta-se a vontade para dar exemplos de -# como usar os dados. -# Se souber, liste também aplicações: pesquisa, apps, etc. que usem os dados. -description: | # REQUIRED - Produto Interno Bruto (PIB) municipal a preços correntes. - -# Qual organização/departamento/pessoa tratou os dados? -# As vezes há um ponto intermediário entre os dados originais e subir na Base dos Dados. -# Se essa pessoa é você, preencha abaixo com suas informações. -treated_by: - name: Ricardo Dahis - code_url: https://github.com/basedosdados/mais/tree/master/bases/br_ibge_pib/code # REQUIRED - website: - email: rdahis@basedosdados.org - -# Quem está completando esse arquivo config? -published_by: - name: Ricardo Dahis - code_url: https://github.com/basedosdados/mais/tree/master/bases/br_ibge_pib/code # REQUIRED - website: www.ricardodahis.com - email: rdahis@basedosdados.org - -# Se houve passos de tratamento, limpeza e manipulação de dados, descreva-os aqui. -treatment_description: | - - Mudamos a unidade de medida para R$1. - -# Com qual frequência a base é atualizada? -# Opções: hora | dia | semana | mês | 1 ano | 2 anos | 5 anos | 10 anos | único | recorrente -data_update_frequency: 1 ano - -# Nível da observação: quais colunas identificam uma linha unicamente? -# Escolha todas as opções necessárias. -# Regras: minúsculo, sem acento, singular. -# Opções (exemplos): pais, estado, municipio, cidade, hora, dia, semana, mes, ano, etc. -observation_level: - - municipio - - ano - -# Quais colunas identificam uma linha unicamente? -# Preencha com os nomes de colunas. Ex: id_municipio, ano. -# Pode ser vazio pois certas tabelas não possuem identificadores. -primary_keys: - - - -# Qual é a cobertura espacial da tabela? -# Regras: -# - minúsculo, sem acento, singular -# - descer até o menor nível administrativo cuja cobertura abaixo seja 'todos' -# Exemplo 1: tabela que cubra todos os municípios nos estados de SP e GO -# - brasil -# - SP, GO -# Exemplo 2: tabela que cubra países inteiros na América Latina -# - brasil, argentina, peru, equador -coverage_geo: - - brasil - -# Qual é a cobertura temporal (em anos) da tabela? -# Opções: pre-1999, 2000, 2001, ..., 2019, 2020 -coverage_time: - - 2002 - - 2003 - - 2004 - - 2005 - - 2006 - - 2007 - - 2008 - - 2009 - - 2010 - - 2011 - - 2012 - - 2013 - - 2014 - - 2015 - - 2016 - - 2017 - - 2018 - -# Liste as colunas da tabela que representam partições. -# Não esqueça de deletar essas colunas nas tabelas .csv na hora de subir para o BigQuery. -# Isso poupará muito tempo e dinheiro às pessoas utilizando essa tabela. -# Se não houver partições, não modifique abaixo. -partitions: keys # REQUIRED - -# Quais são as colunas? Certifique-se de escrever uma boa descrição, as pessoas vão gostar -# para saber sobre o que é a coluna. -# Adicionar todas as colunas manualmente pode ser bastante cansativo, por isso, quando -# inicializando este arquivo de configuração, você pode apontar a função para uma amostra de dados que -# preencherá automaticamente as colunas. -# Algumas colunas existirão apenas na tabela final, você as construirá em `publish.sql`. -# Para esses, defina is_in_staging como False. -# Além disso, você deve adicionar as colunas de partição aqui e definir is_partition como True. -columns: # REQUIRED - - - - name: id_municipio - description: ID Município - IBGE 7 Dígitos - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: ano - description: Ano - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: PIB - description: Produto Interno Bruto a preços correntes - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: impostos_liquidos - description: Impostos, líquidos de subsídios, sobre produtos a preços correntes - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA - description: Valor adicionado bruto a preços correntes total - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA_agropecuaria - description: Valor adicionado bruto a preços correntes da agropecuária - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA_industria - description: Valor adicionado bruto a preços correntes da indústria - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA_servicos - description: Valor adicionado bruto a preços correntes dos serviços, exclusive administração, defesa, educação e saúde públicas e seguridade social - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: VA_ADESPSS - description: Valor adicionado bruto a preços correntes da administração, defesa, educação e saúde públicas e seguridade social - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: Wrong_column_name - description: wrong columns description - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: False # Bool [True, False], whether the column is a partition. - - - - name: keys - description: partition key - is_in_staging: True # Bool [True, False], whether the column is in the staging table - is_partition: True # Bool [True, False], whether the column is a partition. \ No newline at end of file diff --git a/python-package/tests/test_base.py b/python-package/tests/test_base.py index faebe3a0e..3e79d5112 100644 --- a/python-package/tests/test_base.py +++ b/python-package/tests/test_base.py @@ -1,12 +1,15 @@ """ Tests for Base class """ -# pylint: disable=unused-variable +import os import re +import time from pathlib import Path -from google.cloud import storage, bigquery +from google.cloud import bigquery, storage + +# pylint: disable=unused-variable def test_bucket_name(base, config_file_exists, capsys): @@ -76,6 +79,7 @@ def test_main_vars(base, config_file_exists, capsys): out, err = capsys.readouterr() assert "Apparently, that is the first time that you are using" in out + def test_metadata_path(base, config_file_exists, capsys): """ Test the metadata_path function @@ -107,3 +111,72 @@ def test_uri(base, config_file_exists, capsys): else: out, err = capsys.readouterr() assert "Apparently, that is the first time that you are using" in out + + +############################################ +# NEW API AUTHENTICATION TESTS +# a separate configuration directory is +# used for these tests (.basedosdados_teste) +############################################ +def test_load_no_token(base): + """ + Test get_token function + """ + if base.token_file.exists(): + base.token_file.unlink() + access_token = base.load_token().get("access") + assert not access_token + + +def test_get_new_token(base): + """ + Test get_token function + """ + + username = base.config["user"]["email"] + # Password must be declared as an environment variable + password = os.getenv("API_PASSWORD") + token = base.get_token(username, password) + try: + base.save_token(token) + except Exception as e: + print(e) + + assert base.token_file.exists() + assert base.verify_token(token) is True + + +def test_refresh_token(base): + """ + Test refresh_token function. + TODO: This test still depends on previous one. Should use a mock? + """ + token = base.load_token() + time.sleep(5) # without this, the token is not refreshed + new_token = base.refresh_token(token) + try: + base.save_token(new_token) + except Exception as e: + print(e) + assert base.token_file.exists() + assert token["access"] != new_token["access"] + + +def test_get_dataset_id_from_slug(base): + """ + Test graphql_request function by getting the + id (UUID) from the slug + TODO: This test depends on a mock of the API + """ + dataset_id = base._get_dataset_id_from_slug("dados_mestres") + assert dataset_id == "ba8fb30a-a978-4495-875a-5f268fab4ef5" + + +def test_get_table_id_from_slug(base): + """ + Test graphql_request function by getting the + id (UUID) from the slugs of dataset and table + TODO: This test depends on a mock of the API + """ + table_id = base._get_table_id_from_slug("dados_mestres", "bairro") + assert table_id == "4f536063-9938-4d95-a0d1-4ec25fc1923b" diff --git a/python-package/tests/test_cli.py b/python-package/tests/test_cli.py index ba5c9e79c..779a2ab7a 100644 --- a/python-package/tests/test_cli.py +++ b/python-package/tests/test_cli.py @@ -1,22 +1,23 @@ """ Tests for the CLI module. """ -# pylint: disable=consider-using-with -from pathlib import Path + import os import subprocess +# pylint: disable=consider-using-with +from pathlib import Path + import pytest -import basedosdados as bd from google.api_core.exceptions import NotFound +import basedosdados as bd DATASET_ID = "pytest" TABLE_ID = "pytest" -TABLE_FILES = ["publish.sql", "table_config.yaml"] +TABLE_FILES = ["publish.sql"] -METADATA_FILES = {"dataset": "dataset_config.yaml", "table": "table_config.yaml"} cli_dir = Path(__file__).parent / ".." / "basedosdados" / "cli" @@ -48,79 +49,6 @@ def _run_command(command, output="err"): raise ValueError("output must be either err or out") -def _table_exists(table: bd.Table, mode: str): - """ - Check if table exists in BigQuery - """ - - try: - table.client[f"bigquery_{mode}"].get_table(table.table_full_name[mode]) - return True - except NotFound: - return False - - -def test_cli_dataset_create(python_path): - """ - Test create dataset command - """ - out = _run_command( - f"{python_path} -m cli dataset create {DATASET_ID} --if_exists=replace", - output="out", - ) - - assert b"Datasets `pytest` and `pytest_staging` were created in BigQuery" in out - - -def test_cli_dataset_publicize(python_path): - """ - Test publicize dataset command - """ - - out = _run_command( - f"{python_path} -m cli dataset publicize {DATASET_ID}", output="out" - ) - - assert b"Dataset `pytest` became public!" in out - - -def test_cli_dataset_delete(python_path): - """ - Test delete dataset command - """ - - out = _run_command( - f"yes | {python_path} -m cli dataset delete {DATASET_ID}", output="out" - ) - - assert b"Datasets `pytest` and `pytest_staging` were deleted in BigQuery" in out - - -def test_cli_dataset_init(python_path): - """ - Test init dataset command - """ - - out = _run_command( - f"{python_path} -m cli dataset init {DATASET_ID} --replace", output="out" - ) - - assert b"Dataset `pytest` folder and metadata were created at" in out - - -@pytest.mark.skip(reason="require positional argument not explicited by help") -def test_cli_dataset_update(python_path): - """ - Test update dataset command - """ - - out = _run_command( - f"{python_path} -m cli dataset update {DATASET_ID}", output="out" - ) - - assert b"Dataset `pytest` was updated in BigQuery" in out - - def test_cli_download(testdir, python_path): """ Test download command @@ -196,105 +124,3 @@ def test_cli_list_datasets(python_path): err = _run_command(f"{python_path} -m cli list_datasets", output="err") assert b"Datasets" in err - - -def test_cli_metadata_create(default_metadata_path, python_path): - """ - Teste metadata create command - """ - - _ = _run_command( - f"{python_path} -m cli metadata create --if_exists=replace {DATASET_ID} {TABLE_ID}", - output="out", - ) - - assert (default_metadata_path / DATASET_ID / METADATA_FILES["dataset"]).exists() - assert ( - default_metadata_path / DATASET_ID / TABLE_ID / METADATA_FILES["table"] - ).exists() - - -def test_cli_metadata_isupdated(python_path): - """ - Teste metadata isupdated command - """ - - out = _run_command( - f"{python_path} -m cli metadata is_updated {DATASET_ID}", output="out" - ) - - assert b"Local metadata is updated." in out - - -@pytest.mark.skip(reason="require positional argument not explicited by help") -def test_cli_storage_upload(testdir, python_path): - """ - Teste storage upload command - """ - - _ = _run_command( - f"{python_path} -m cli storage upload {testdir / 'data.csv'} {DATASET_ID}.{TABLE_ID}", - output="out", - ) - - assert os.path.isfile(testdir / "data.csv") - - -@pytest.mark.skip(reason="require positional argument not explicited by help") -def test_cli_storage_download(testdir, python_path): - """ - Teste storage download command - """ - - _ = _run_command( - f"{python_path} -m cli storage download {testdir / 'data.csv'} {DATASET_ID}.{TABLE_ID}", - output="out", - ) - - assert os.path.isfile(testdir / "data.csv") - - -def test_cli_table_create(python_path, data_csv_path): - """ - Teste table create command - """ - - _ = _run_command( - f"""{python_path} -m cli table create --if_table_exists=replace \ - --if_table_config_exists=replace --if_storage_data_exists=replace \ - --path={data_csv_path} pytest pytest - """, - output="out", - ) - - table = bd.Table(DATASET_ID, TABLE_ID) - - assert _table_exists(table, mode="staging") - - -def test_cli_table_delete(python_path): - """ - Teste table delete command - """ - - _ = _run_command( - f"{python_path} -m cli table delete --mode=staging {DATASET_ID} {TABLE_ID}", - output="out", - ) - - table = bd.Table(DATASET_ID, TABLE_ID) - - assert not _table_exists(table, mode="staging") - - -def test_cli_create_no_path_error(testdir, python_path): - """ - Teste if error is raised when no path is provided - """ - - err = _run_command( - f"{python_path} -m cli --metadata_path={testdir} table create --if_table_exists=pass {DATASET_ID} {TABLE_ID}", - output="err", - ) - - assert b"You must provide a path to correctly create config files" in err diff --git a/python-package/tests/test_dataset.py b/python-package/tests/test_dataset.py index bce988712..3b64b6ee2 100644 --- a/python-package/tests/test_dataset.py +++ b/python-package/tests/test_dataset.py @@ -1,13 +1,14 @@ """ Tests for the Dataset class """ +import shutil + # pylint: disable=protected-access from pathlib import Path -import shutil +import google.api_core.exceptions as google_exceptions import pytest from google.cloud import bigquery -import google.api_core.exceptions as google_exceptions DATASET_ID = "pytest" diff --git a/python-package/tests/test_datatype.py b/python-package/tests/test_datatype.py index f9022561c..080218a81 100644 --- a/python-package/tests/test_datatype.py +++ b/python-package/tests/test_datatype.py @@ -7,9 +7,11 @@ ExternalConfig, HivePartitioningOptions, ) + import basedosdados as bd from basedosdados.upload.datatypes import Datatype + def test_header_avro(data_avro_path): """ Test if header is returned for avro format diff --git a/python-package/tests/test_download/test_download.py b/python-package/tests/test_download/test_download.py index c2dd56feb..a5fcb8737 100644 --- a/python-package/tests/test_download/test_download.py +++ b/python-package/tests/test_download/test_download.py @@ -2,25 +2,20 @@ Tests for the `download` class. """ -from pathlib import Path import shutil +from pathlib import Path -import pytest import pandas as pd +import pytest from pandas_gbq.gbq import GenericGBQException -from basedosdados import ( - download, - read_sql, - read_table, -) +from basedosdados import download, read_sql, read_table from basedosdados.exceptions import ( BaseDosDadosException, - BaseDosDadosNoBillingProjectIDException, BaseDosDadosInvalidProjectIDException, + BaseDosDadosNoBillingProjectIDException, ) - TEST_PROJECT_ID = "basedosdados-dev" SAVEFILE = Path(__file__).parent.parent / "tmp_bases" / "test.csv" SAVEPATH = Path(__file__).parent.parent / "tmp_bases" diff --git a/python-package/tests/test_download/test_metadata.py b/python-package/tests/test_download/test_metadata.py index d4379329a..f114a2405 100644 --- a/python-package/tests/test_download/test_metadata.py +++ b/python-package/tests/test_download/test_metadata.py @@ -1,22 +1,21 @@ -''' +""" Tests for metadata download fuunctions -''' -from pathlib import Path +""" import shutil +from pathlib import Path -import pytest import pandas as pd +import pytest from basedosdados import ( - list_datasets, - list_dataset_tables, get_dataset_description, - get_table_description, get_table_columns, + get_table_description, get_table_size, + list_dataset_tables, + list_datasets, search, ) - from basedosdados.download.metadata import _safe_fetch TEST_PROJECT_ID = "basedosdados-dev" @@ -26,9 +25,9 @@ def test_list_datasets_simple_verbose(capsys): - ''' + """ Test if list_datasets function works with verbose=True - ''' + """ out = list_datasets(with_description=False, verbose=True) out, _ = capsys.readouterr() # Capture prints @@ -36,9 +35,9 @@ def test_list_datasets_simple_verbose(capsys): def test_list_datasets_simple_list(): - ''' + """ Test if list_datasets function works with verbose=False - ''' + """ out = list_datasets(with_description=False, verbose=False) # check if function returns list @@ -48,9 +47,9 @@ def test_list_datasets_simple_list(): def test_list_datasets_complete_list(): - ''' + """ Test if list_datasets function works with verbose=False and with_description=True - ''' + """ out = list_datasets(with_description=True, verbose=False) # check if function returns list @@ -60,9 +59,9 @@ def test_list_datasets_complete_list(): def test_list_datasets_complete_verbose(capsys): - ''' + """ Test list_datasets with complete output - ''' + """ list_datasets(with_description=True, verbose=True) out, _ = capsys.readouterr() # Capture prints @@ -71,9 +70,9 @@ def test_list_datasets_complete_verbose(capsys): def test_list_dataset_tables_simple_verbose(capsys): - ''' + """ Test list_dataset_tables function with verbose=True and with_description=False - ''' + """ list_dataset_tables(dataset_id="br_me_caged", with_description=False, verbose=True) out, _ = capsys.readouterr() # Capture prints @@ -81,9 +80,9 @@ def test_list_dataset_tables_simple_verbose(capsys): def test_list_dataset_tables_simple_list(): - ''' + """ Test list_dataset_tables function with verbose=False and with_description=False - ''' + """ out = list_dataset_tables( dataset_id="br_me_caged", with_description=False, verbose=False @@ -94,9 +93,9 @@ def test_list_dataset_tables_simple_list(): def test_list_dataset_tables_complete_verbose(capsys): - ''' + """ Test list_dataset_tables function with verbose=True and with_description=True - ''' + """ list_dataset_tables(dataset_id="br_me_caged", with_description=True, verbose=True) @@ -106,9 +105,9 @@ def test_list_dataset_tables_complete_verbose(capsys): def test_list_dataset_tables_complete_list(): - ''' + """ Test list_dataset_tables function with verbose=False and with_description=True - ''' + """ out = list_dataset_tables( dataset_id="br_me_caged", with_description=True, verbose=False @@ -119,9 +118,9 @@ def test_list_dataset_tables_complete_list(): def test_get_dataset_description(capsys): - ''' + """ Test get_dataset_description function with verbose=False - ''' + """ get_dataset_description("br_me_caged", verbose=True) out, _ = capsys.readouterr() # Capture prints @@ -129,27 +128,27 @@ def test_get_dataset_description(capsys): def test_get_dataset_description_verbose_false(): - ''' + """ Test get_dataset_description function with verbose=False - ''' + """ out = get_dataset_description("br_me_caged", verbose=False) assert isinstance(out, str) assert len(out) > 0 def test_get_table_description(capsys): - ''' + """ Test get_table_description function with verbose=False - ''' + """ get_table_description("br_me_caged", "microdados_antigos") out, _ = capsys.readouterr() # Capture prints assert len(out) > 0 def test_get_table_description_verbose_false(): - ''' + """ Test get_table_description function with verbose=False - ''' + """ out = get_table_description( dataset_id="br_me_caged", table_id="microdados_antigos", @@ -160,9 +159,9 @@ def test_get_table_description_verbose_false(): def test_get_table_columns(capsys): - ''' + """ Test get_table_columns function with verbose=False - ''' + """ get_table_columns( dataset_id="br_me_caged", table_id="microdados_antigos", @@ -173,9 +172,9 @@ def test_get_table_columns(capsys): def test_get_table_columns_verbose_false(): - ''' + """ Test get_table_columns function with verbose=False - ''' + """ out = get_table_columns( dataset_id="br_me_caged", table_id="microdados_antigos", @@ -186,9 +185,9 @@ def test_get_table_columns_verbose_false(): def test_search(): - ''' + """ Test search function with verbose=False - ''' + """ out = search(query="agua", order_by="score") # check if function returns pd.DataFrame assert isinstance(out, pd.DataFrame) @@ -200,9 +199,9 @@ def test_search(): def test_get_table_size(capsys): - ''' + """ Test get_table_size function with verbose=False - ''' + """ get_table_size( dataset_id="br_ibge_censo_demografico", table_id="setor_censitario_basico_2010", @@ -212,9 +211,9 @@ def test_get_table_size(capsys): def test__safe_fetch(capsys): - ''' + """ Test _safe_fetch function with verbose=False - ''' + """ _safe_fetch("https://www.lkajsdhgfal.com.br") out, _ = capsys.readouterr() # Capture prints assert "HTTPSConnection" in out diff --git a/python-package/tests/test_metadata.py b/python-package/tests/test_metadata.py index f33579926..1187e245a 100644 --- a/python-package/tests/test_metadata.py +++ b/python-package/tests/test_metadata.py @@ -1,20 +1,22 @@ """ Test for the Metadata class """ -# pylint: disable=fixme -from pathlib import Path import random import shutil import string +# pylint: disable=fixme +from pathlib import Path + import pytest import ruamel.yaml as ryaml -from basedosdados import Metadata +from basedosdados import Metadata # TODO: deprecate from basedosdados.exceptions import BaseDosDadosException METADATA_FILES = {"dataset": "dataset_config.yaml", "table": "table_config.yaml"} + def test_create_from_dataset_id(dataset_metadata, dataset_metadata_path): """ Test metadata creation from a dataset id. diff --git a/python-package/tests/test_metadata_api/test_metadata_api.py b/python-package/tests/test_metadata_api/test_metadata_api.py new file mode 100644 index 000000000..06305a54f --- /dev/null +++ b/python-package/tests/test_metadata_api/test_metadata_api.py @@ -0,0 +1,195 @@ +""" +Tests for the Metadata class with new API +""" +# pylint: disable=fixme, unused-import, protected-access, trailing-whitespace, too-many-branches +import pytest + +from basedosdados import Metadata # TODO: deprecate +from basedosdados.exceptions import BaseDosDadosException + +METADATA_FILES = {"dataset": "dataset_config.yaml", "table": "table_config.yaml"} + + +def test_dataset_table_schema(api_table_metadata): + """ + Test table schema. + """ + + metadata = api_table_metadata.api_data_dict + + assert metadata.get("id") == api_table_metadata.dataset_uuid + assert metadata.get("tables")[0].get("id") == api_table_metadata.table_uuid + + +def test_dataset_schema(api_dataset_metadata): + """ + Test dataset schema. + """ + + metadata = api_dataset_metadata.api_data_dict + + assert metadata.get("id") == api_dataset_metadata.dataset_uuid + assert "themes" in metadata + + +def test_dataset_does_not_exists(api_new_dataset_metadata): + """ + Test if dataset exists in API. + """ + assert api_new_dataset_metadata.exists() is False + + +def test_dataset_exists(api_dataset_metadata): + """ + Test if dataset does not exists in API. + """ + assert api_dataset_metadata.exists() is True + + +def test_dataset_table_does_not_exists(api_new_dataset_metadata): + """ + Test if table does not exists in API. + """ + assert api_new_dataset_metadata.exists() is False + + +def test_dataset_table_exists(api_table_metadata): + """ + Test if table exists in API. + """ + assert api_table_metadata.exists() is True + + +@pytest.mark.skip(reason="This test needs to mock an invalid dataset.") +def test_dataset_is_not_updated(api_dataset_metadata): + """ + Test if dataset is updated. + TODO: Create a deterministic test to return True. + """ + + assert api_dataset_metadata.is_updated() is False + + +def test_columns_schema(api_table_metadata): + """ + Test columns schema. + """ + columns_schema = api_table_metadata.columns_schema + assert "directory_column" in columns_schema.get("properties") + + +def test_table_metadata_schema(api_table_metadata): + """ + Test metadata schema. + """ + metadata_schema = api_table_metadata.metadata_schema + resource_type = ( + metadata_schema.get("properties").get("resource_type").get("enum")[0] + ) + assert resource_type == "bdm_table" + + +def test_dataset_metadata_schema(api_dataset_metadata): + """ + Test metadata schema. + """ + metadata_schema = api_dataset_metadata.metadata_schema + resource_type = metadata_schema.get("properties").get("type").get("enum")[0] + assert resource_type == "dataset" + + +def test_create_new_table(api_new_table_metadata): + """ + Test if table is created. To be reproducible, + the test first deletes the table if it exists. + """ + if api_new_table_metadata.filepath.exists(): + api_new_table_metadata.filepath.unlink() + + res = api_new_table_metadata.create( + if_exists="replace", table_only=False, columns=["ano", "sigla_uf", "dados"] + ) + + assert api_new_table_metadata.filepath.exists() is True + assert isinstance(res, Metadata) + + +def test_update_dataset(api_dataset_metadata): + """ + Test if dataset is updated. + """ + res = api_dataset_metadata.create(if_exists="replace") + assert isinstance(res, Metadata) + + +def test_update_table(api_table_metadata): + """ + Test if table is updated. + """ + res = api_table_metadata.create(if_exists="replace") + assert isinstance(res, Metadata) + + +def test_simplify_graphql_empty_query(api_table_metadata): + """ + Test if empty query is returned. + """ + query = """ + """ + variables = {} + cleaned_res = api_table_metadata._get_graphql(query, variables) + assert cleaned_res == {} + + +def test_owner_org_table_exists(api_table_metadata): + """ + Test if owner_org is returned. + """ + owner_org = api_table_metadata.owner_org + assert owner_org == "c0b18195-ee44-464a-8b32-dfdfd9473c4d" + + +def test_owner_org_dataset_exists(api_dataset_metadata): + """ + Test if owner_org is returned. + """ + owner_org = api_dataset_metadata.owner_org + assert owner_org == "c0b18195-ee44-464a-8b32-dfdfd9473c4d" + + +def test_owner_org_new_table(api_new_table_metadata): + """ + Test if no owner_org is returned. + """ + + with pytest.raises(BaseDosDadosException): + _ = api_new_table_metadata.owner_org # pylint: disable=unused-variable + + +def test_simplify_graphql_response(api_table_metadata): + """ + Test if edges and nodes are removed from graphql response. + """ + query_res = { + "allDataset": { + "edges": [ + { + "node": { + "_id": "c0b18195-ee44-464a-8b32-dfdfd9473c4d", + "name": "br_ibge_pib", + "title": "Produto Interno Bruto", + "description": "Produto Interno Bruto", + "themes": { + "edges": [ + {"node": {"slug": "economia", "name": "Economia"}} + ] + }, + } + } + ] + } + } + + cleaned_res = api_table_metadata._simplify_graphql_response(query_res) + assert cleaned_res["allDataset"][0]["_id"] == "c0b18195-ee44-464a-8b32-dfdfd9473c4d" + assert isinstance(cleaned_res["allDataset"][0]["themes"], list) diff --git a/python-package/tests/test_metadata_api/test_publish.py b/python-package/tests/test_metadata_api/test_publish.py new file mode 100644 index 000000000..300ded878 --- /dev/null +++ b/python-package/tests/test_metadata_api/test_publish.py @@ -0,0 +1,168 @@ +""" +Test for publishing with new API. +""" +import os + +# pylint: disable=fixme, unused-import, protected-access +from pprint import pprint + +import pytest + +from basedosdados.exceptions import BaseDosDadosException +from basedosdados.upload.remoteapi import RemoteAPI + +METADATA_FILES = {"dataset": "dataset_config.yaml", "table": "table_config.yaml"} + + +@pytest.mark.skip(reason="This test needs to mock an invalid token.") +def test_user_not_authorized(api_new_dataset_metadata): + """ + Test if user is not authorized. + """ + + with pytest.raises(BaseDosDadosException): + api_new_dataset_metadata.publish() + + +def make_login(obj): + """ + Logs the user in. Password must be set as API_PASSWORD envvar. + """ + print("\nTrying to login...") + token = obj.load_token() + try: + if not obj.verify_token(token): + raise BaseDosDadosException("Token is invalid.") + except BaseDosDadosException: + print("Token is invalid. Trying to refresh...") + try: + obj.refresh_token(token) + except BaseDosDadosException: + password = os.getenv("API_PASSWORD") + if not password: + raise ValueError("API_PASSWORD not found in environment variables.") + new_token = obj.get_token("mauricio", password) + obj.save_token(new_token) + + +def test_publish_existent_metadata_raise(api_dataset_metadata): + """ + Test if publishing an existent metadata raises an error. + """ + make_login(api_dataset_metadata) + with pytest.raises(Exception): + api_dataset_metadata.publish(if_exists="raise") + + +def test_publish_existent_metadata_pass(api_dataset_metadata): + """ + Test if publishing an existent metadata passes. + """ + make_login(api_dataset_metadata) + res = api_dataset_metadata.publish(if_exists="pass") + assert isinstance(res, dict) + assert len(res) == 0 + + +def test_publish_new_dataset(api_new_dataset_metadata): + """ + Test if api_data_dict is a dict. + """ + make_login(api_new_dataset_metadata) + response = api_new_dataset_metadata.publish() + print(response) + assert response["result"] == "not implemented yet" + + +def test_publish_existent_dataset_metadata_replace(api_dataset_metadata): + """ + Test if publishing an existent metadata replaces data in API. + """ + make_login(api_dataset_metadata) + response = api_dataset_metadata.publish(if_exists="replace", update_locally=True) + print(response) + assert response["result"] == "not implemented yet" + + +def test_publish_existent_table_metadata_replace(api_table_metadata): + """ + Test if publishing an existent metadata replaces data in API. + """ + make_login(api_table_metadata) + if not api_table_metadata.is_updated(): + api_table_metadata.create(if_exists="replace") + response = api_table_metadata.publish( + if_exists="replace", all=True, update_locally=True + ) + assert response["result"] == "not implemented yet" + + +def test_api_data_dict(api_dataset_metadata): + """ + Test if api_data_dict is a dict. + """ + dataset_dict = api_dataset_metadata.api_data_dict + pprint(dataset_dict) + assert isinstance(dataset_dict, dict) + + +def test_api_prepare_fields(api_dataset_metadata): + """ + Test if api_data_dict returns correct fields. + """ + make_login(api_dataset_metadata) + remote_api = RemoteAPI( + api_dataset_metadata.api_graphql, api_dataset_metadata.load_token() + ) + fields = remote_api._prepare_fields( + {"id": api_dataset_metadata.dataset_uuid, "name": "Teste do nome"}, + model="dataset", + ) + pprint(fields) + assert "id" in fields + + +def test_api_update_dataset(api_dataset_metadata): + """ + Test if api_data_dict is a dict. + """ + remote_api = RemoteAPI( + api_dataset_metadata.api_graphql, api_dataset_metadata.load_token() + ) + response = remote_api.call_action( + "update_dataset", api_dataset_metadata.api_data_dict + ) + pprint(response) + assert response["result"] == "not implemented yet" + + +def test_api_create_dataset(api_new_dataset_metadata): + """ + Test if api_data_dict is a dict. + """ + remote_api = RemoteAPI( + api_new_dataset_metadata.api_graphql, api_new_dataset_metadata.load_token() + ) + response = remote_api.call_action( + "create_dataset", api_new_dataset_metadata.api_data_dict + ) + pprint(response) + assert response["result"] == "not implemented yet" + + +def test_api_create_table(api_ipea_table_metadata): + """ + Test creating table with dataset. + Args: + api_ipea_table_metadata (Metadata): Metadata object (copied from ineb) + Returns: + None + """ + make_login(api_ipea_table_metadata) + response = api_ipea_table_metadata.publish( + all=True, + if_exists="replace", + update_locally=False, + ) + pprint(response) + assert response["result"] == "not implemented yet" diff --git a/python-package/tests/test_storage.py b/python-package/tests/test_storage.py index cdf7748c0..b335f6f5c 100644 --- a/python-package/tests/test_storage.py +++ b/python-package/tests/test_storage.py @@ -2,15 +2,15 @@ Tests for the Storage class """ -from pathlib import Path import os +from pathlib import Path import pytest from google.api_core.exceptions import NotFound from google.cloud import storage as storage_gcloud -from basedosdados import Storage -import basedosdados as bd +import basedosdados as bd +from basedosdados import Storage DATASET_ID = "pytest" TABLE_ID = "pytest" @@ -92,12 +92,12 @@ def test_download_partitions(storage): assert ( Path(SAVEPATH) - / "staging" - / DATASET_ID - / TABLE_ID - / "key1=value1" - / "key2=value1" - / "municipio.csv" + / "staging" # noqa + / DATASET_ID # noqa + / TABLE_ID # noqa + / "key1=value1" # noqa + / "key2=value1" # noqa + / "municipio.csv" # noqa ).is_file() storage.download( @@ -108,12 +108,12 @@ def test_download_partitions(storage): assert ( Path(SAVEPATH) - / "staging" - / DATASET_ID - / TABLE_ID - / "key1=value1" - / "key2=value2" - / "municipio.csv" + / "staging" # noqa + / DATASET_ID # noqa + / TABLE_ID # noqa + / "key1=value1" # noqa + / "key2=value2" # noqa + / "municipio.csv" # noqa ).is_file() @@ -185,7 +185,9 @@ def test_change_path_credentials(storage, sample_data): os.system(f"mkdir {home}/.testcredentials") os.system(f"mv -r {home}/.basedosdados/* .testcredentials") - os.system("sed -i 's/\/.basedosdados\//\/.testcredentials\//g' config.toml") # pylint: disable=W1401 + os.system( + "sed -i 's/\/.basedosdados\//\/.testcredentials\//g' config.toml" # noqa + ) # pylint: disable=W1401 bd.config.project_config_path = f"{home}/.testcredentials" @@ -199,7 +201,7 @@ def test_change_path_credentials(storage, sample_data): files = [blob.name for blob in client.list_blobs("basedosdados-dev-backup")] # delete file from new bucket - file =f'staging/{DATASET_ID}/{TABLE_ID}/municipio.csv' + file = f"staging/{DATASET_ID}/{TABLE_ID}/municipio.csv" bucket = client.get_bucket("basedosdados-dev-backup") blob = bucket.blob(file) blob.delete() @@ -207,7 +209,9 @@ def test_change_path_credentials(storage, sample_data): # move again .basedosdados folder os.system(f"mv -r {home}/.testcredentials/* .basedosdados") # replace path in config.toml - os.system("sed -i 's/\/.testcredentials\//\/.basedosdados\//g' config.toml") # pylint: disable=W1401 + os.system( + "sed -i 's/\/.testcredentials\//\/.basedosdados\//g' config.toml" # noqa + ) # pylint: disable=W1401 os.system(f"rm -r {home}/.testcredentials") diff --git a/python-package/tests/test_table.py b/python-package/tests/test_table.py index 3b8b99715..754e415a4 100644 --- a/python-package/tests/test_table.py +++ b/python-package/tests/test_table.py @@ -1,16 +1,17 @@ """ Tests for the Table class """ +import shutil + # pylint: disable=invalid-name from pathlib import Path -import shutil -import basedosdados as bd -from basedosdados import Dataset, Table, Storage -from basedosdados.exceptions import BaseDosDadosException +import pytest from google.api_core.exceptions import NotFound -import pytest +import basedosdados as bd +from basedosdados import Dataset, Storage, Table +from basedosdados.exceptions import BaseDosDadosException DATASET_ID = "pytest" TABLE_ID = "pytest" @@ -473,19 +474,26 @@ def test_create_if_storage_data_raise(table, testdir, data_csv_path): if_storage_data_exists="raise", ) + def test_create_if_force_columns_true(testdir): """ Test create when if_force_columns is True """ - dataset_id='br_cvm_administradores_carteira' - table_id='pessoa_fisica' + dataset_id = "br_cvm_administradores_carteira" + table_id = "pessoa_fisica" tb = bd.Table(dataset_id=dataset_id, table_id=table_id, metadata_path=testdir) - client =Storage(dataset_id=dataset_id, table_id=table_id) - client.download('bd_pessoa_fisica.csv', testdir, mode='staging') + client = Storage(dataset_id=dataset_id, table_id=table_id) + client.download("bd_pessoa_fisica.csv", testdir, mode="staging") - filepath = testdir / 'staging' / 'br_cvm_administradores_carteira' / 'pessoa_fisica' / 'bd_pessoa_fisica.csv' + filepath = ( + testdir + / "staging" # noqa + / "br_cvm_administradores_carteira" # noqa + / "pessoa_fisica" # noqa + / "bd_pessoa_fisica.csv" # noqa + ) tb.create( filepath, @@ -495,22 +503,28 @@ def test_create_if_force_columns_true(testdir): force_columns=True, ) - assert tb.table_config['columns'][0]['description'] is None + assert tb.table_config["columns"][0]["description"] is None def test_create_if_force_columns_false(testdir): """ Test create when if_force_columns is True """ - dataset_id='br_cvm_administradores_carteira' - table_id='pessoa_fisica' + dataset_id = "br_cvm_administradores_carteira" + table_id = "pessoa_fisica" tb = bd.Table(dataset_id=dataset_id, table_id=table_id, metadata_path=testdir) - client =Storage(dataset_id=dataset_id, table_id=table_id) - client.download('bd_pessoa_fisica.csv', testdir, mode='staging') + client = Storage(dataset_id=dataset_id, table_id=table_id) + client.download("bd_pessoa_fisica.csv", testdir, mode="staging") - filepath = testdir / 'staging' / 'br_cvm_administradores_carteira' / 'pessoa_fisica' / 'bd_pessoa_fisica.csv' + filepath = ( + testdir + / "staging" # noqa + / "br_cvm_administradores_carteira" # noqa + / "pessoa_fisica" # noqa + / "bd_pessoa_fisica.csv" # noqa + ) tb.create( filepath, @@ -520,7 +534,7 @@ def test_create_if_force_columns_false(testdir): force_columns=False, ) - assert tb.table_config['columns'][0]['description']=='Nome' + assert tb.table_config["columns"][0]["description"] == "Nome" def test_create_auto_partitions(testdir, data_csv_path, sample_data): @@ -613,7 +627,7 @@ def test_update(table, data_csv_path): assert table_exists(table, "staging") - ### Como dar assert que a descrição foi atualizada? + # Como dar assert que a descrição foi atualizada? table.update(mode="all") diff --git a/python-package/tests/test_utils.py b/python-package/tests/test_utils.py new file mode 100644 index 000000000..bc173148f --- /dev/null +++ b/python-package/tests/test_utils.py @@ -0,0 +1,96 @@ +""" +Tests for the upload utilities. +""" +import os + +# pylint: disable=invalid-name +import shutil +from glob import glob + +import pandas as pd + +from basedosdados.upload.utils import break_file, to_partitions, update_columns + +DATASET_ID = "pytest" +TABLE_ID = "pytest" + +TABLE_FILES = ["publish.sql", "table_config.yaml"] + + +def test_update_columns(sample_data, testdir, table): + """ + Test the update_columns utility. + """ + # table = bd.Table(dataset_id=DATASET_ID, table_id=TABLE_ID, metadata_path=testdir) + table.create( + sample_data, + if_table_exists="replace", + if_table_config_exists="replace", + if_storage_data_exists="replace", + ) + + table_config_path = testdir / DATASET_ID / TABLE_ID / "table_config.yaml" + last_modified = table_config_path.stat().st_mtime + + publish_path = testdir / DATASET_ID / TABLE_ID / "publish.sql" + last_modified_publish = publish_path.stat().st_mtime + + path_arq = sample_data / "arquitetura_municipio.xlsx" + # get path as string + path_arq = str(path_arq) + + update_columns(table_obj=table, columns_config_url_or_path=path_arq) + + assert table_config_path.stat().st_mtime > last_modified + assert publish_path.stat().st_mtime > last_modified_publish + + +def test_to_partitions(sample_data, testdir): + """ + Test the to_partitions utility. + """ + os.makedirs(testdir / "municipio_partitioned", exist_ok=True) + + df = pd.read_csv(sample_data / "municipio.csv") + to_partitions(df, ["ano"], testdir / "municipio_partitioned") + + # assert if the files were created from 2002 to 2011 + for i in range(2002, 2012): + assert os.path.exists( + testdir / "municipio_partitioned" / f"ano={i}" / "data.csv" + ) + + +def test_break_file(sample_data, testdir): + """ + Test the break_file utility. + """ + os.makedirs(testdir / "municipio_files", exist_ok=True) + # copy municipio.csv to municipio_files + shutil.copy(sample_data / "municipio.csv", testdir / "municipio_files") + # get path as string + path = str(testdir / "municipio_files" / "municipio.csv") + break_file( + filepath=path, + columns=[ + "ano", + "id_municipio", + "pib", + "impostos_liquidos", + "va", + "va_agropecuaria", + ], + chunksize=100, + ) + + output_path = str(testdir / "municipio_files" / "municipio") + + files = glob(output_path + "*.csv") + + # check if the summed rows number of the files is equal to the original file + assert sum([len(pd.read_csv(file)) for file in files]) == len( + pd.read_csv(sample_data / "municipio.csv") + ) + + # remove municipio folder + shutil.rmtree(sample_data / "municipio")