Skip to content

Commit

Permalink
Adição do dataset world_wwf_hydrosheds
Browse files Browse the repository at this point in the history
  • Loading branch information
Winzen committed Jan 12, 2024
1 parent 04326cd commit aae87dd
Show file tree
Hide file tree
Showing 6 changed files with 3,477 additions and 0 deletions.
3 changes: 3 additions & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,9 @@ models:
world_oceanos_mapeamento:
+materialized: table
+schema: world_oceanos_mapeamento
world_wwf_hydrosheds:
+materialized: table
+schema: world_wwf_hydrosheds
test_dataset:
+materialized: table
+schema: test_dataset
193 changes: 193 additions & 0 deletions models/world_wwf_hydrosheds/code/hydrosheds_atlas.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "wpHkRnNHJJdE"
},
"outputs": [],
"source": [
"https://www.hydrosheds.org/products"
]
},
{
"cell_type": "code",
"source": [
"pip install pyogrio"
],
"metadata": {
"id": "5b33Qz9CJSVX"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"from ast import main\n",
"import requests\n",
"from zipfile import ZipFile\n",
"from pyogrio import read_dataframe\n",
"import pandas as pd\n",
"import geopandas as gpd\n",
"import concurrent.futures\n",
"import os\n",
"import re\n",
"\n",
"def download_url(url, save_path, chunk_size=128):\n",
" r = requests.get(url, stream=True)\n",
" with open(save_path, 'wb') as fd:\n",
" for chunk in r.iter_content(chunk_size=chunk_size):\n",
" fd.write(chunk)\n",
"\n",
"def extrair(path, path_output):\n",
" # loading the temp.zip and creating a zip object\n",
" with ZipFile(path, 'r') as zObject:\n",
"\n",
" # Extracting all the members of the zip\n",
" # into a specific location.\n",
" zObject.extractall(\n",
" path=path_output)\n",
"\n",
"def threads_tasks(funcion, list1, list2):\n",
" with concurrent.futures.ThreadPoolExecutor() as executor:\n",
" future = executor.map(funcion, list1, list2)\n",
"\n",
"def tables(zip_names: list,\n",
" links: list,\n",
" skip: bool=False,\n",
" flagment: bool=False,\n",
" re_pattern: str=\"\",\n",
" partition: str=\"\"):\n",
"\n",
" if not skip:\n",
"\n",
" threads_tasks(download_url, links, zip_names)\n",
" output_folders = [folder.replace(\".zip\", \"\") + \"/\" for folder in zip_names]\n",
" threads_tasks(extrair, zip_names, output_folders)\n",
" folder_zip = zip_name[0].replace(\".zip\", \"\")\n",
" folders_snp = [f\"{folder_zip}/\" + folder for folder in os.listdir(folder_zip) if folder.count(\"_shp\")]\n",
" files_snp = [folders_snp[0] + \"/\" + folder for folder in os.listdir(folders_snp[0]) if folder.count(\".shp\")]\n",
"\n",
" if flagment:\n",
" for file in files_snp:\n",
"\n",
" df_gio_fragment = read_dataframe(f'/content/' + file)\n",
" df_gio_fragment.columns = df_gio_fragment.columns.str.lower()\n",
"\n",
" region_name = re.findall(re_pattern, file)[0]\n",
" folder_name = f\"output/{partition}=\" + region_name\n",
" create_folder([folder_name])\n",
"\n",
" df_gio_fragment.to_csv(folder_name + f\"/{region_name}.csv\", index=False)\n",
"\n",
" else:\n",
"\n",
" dfs = pd.concat([read_dataframe(f'/content/' + file) for file in files_snp[:1]])\n",
" df_gio = gpd.GeoDataFrame(dfs)\n",
" return df_gio\n",
"\n",
"def create_folder(name_folders: list) -> None:\n",
"\n",
" for folder in name_folders:\n",
" try:\n",
" os.mkdir(folder)\n",
" except:\n",
" pass\n"
],
"metadata": {
"id": "TtxthjyEJVD-"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# RiverAtlas"
],
"metadata": {
"id": "Ap__vD7LJ-We"
}
},
{
"cell_type": "code",
"source": [
"link = [\"https://figshare.com/ndownloader/files/20087486\"]\n",
"zip_name = [\"Global_RiverATLAS.zip\"]\n",
"create_folder([\"output\", \"input\"])\n",
"RiverATLASTESTE = tables(zip_name, link, False, True)"
],
"metadata": {
"id": "EP5KIxJbJxX2"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"#BasinAtlas"
],
"metadata": {
"id": "BTFtQtjBKC7e"
}
},
{
"cell_type": "code",
"source": [
"link = [\"https://figshare.com/ndownloader/files/20087237\"]\n",
"zip_name = [\"Global_BasinATLAS.zip\"]\n",
"create_folder([\"output\", \"input\"])\n",
"BasinATLASTESTE = tables(zip_name, link, False, True,\n",
" re_pattern=\"lev(\\d*).shp\",\n",
" partition=\"level\")"
],
"metadata": {
"id": "-ay26hSvJzRG"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# LakeAtlas"
],
"metadata": {
"id": "IgD_tMfIKFem"
}
},
{
"cell_type": "code",
"source": [
"link = [\"https://figshare.com/ndownloader/files/35959547\"]\n",
"zip_name = [\"Global_LakeATLAS.zip\"]\n",
"create_folder([\"output\", \"input\"])\n",
"LakeATLASTESTE = tables(zip_name, link, False, True,\n",
" re_pattern=\"_([a-z]{3}_[a-z]{4}).shp\",\n",
" partition=\"region\")"
],
"metadata": {
"id": "aN89ppSBJ06_"
},
"execution_count": null,
"outputs": []
}
]
}
Loading

0 comments on commit aae87dd

Please sign in to comment.