-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adição do dataset world_wwf_hydrosheds
- Loading branch information
Showing
6 changed files
with
3,477 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
193 changes: 193 additions & 0 deletions
193
models/world_wwf_hydrosheds/code/hydrosheds_atlas.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
{ | ||
"nbformat": 4, | ||
"nbformat_minor": 0, | ||
"metadata": { | ||
"colab": { | ||
"provenance": [] | ||
}, | ||
"kernelspec": { | ||
"name": "python3", | ||
"display_name": "Python 3" | ||
}, | ||
"language_info": { | ||
"name": "python" | ||
} | ||
}, | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "wpHkRnNHJJdE" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"https://www.hydrosheds.org/products" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"pip install pyogrio" | ||
], | ||
"metadata": { | ||
"id": "5b33Qz9CJSVX" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"from ast import main\n", | ||
"import requests\n", | ||
"from zipfile import ZipFile\n", | ||
"from pyogrio import read_dataframe\n", | ||
"import pandas as pd\n", | ||
"import geopandas as gpd\n", | ||
"import concurrent.futures\n", | ||
"import os\n", | ||
"import re\n", | ||
"\n", | ||
"def download_url(url, save_path, chunk_size=128):\n", | ||
" r = requests.get(url, stream=True)\n", | ||
" with open(save_path, 'wb') as fd:\n", | ||
" for chunk in r.iter_content(chunk_size=chunk_size):\n", | ||
" fd.write(chunk)\n", | ||
"\n", | ||
"def extrair(path, path_output):\n", | ||
" # loading the temp.zip and creating a zip object\n", | ||
" with ZipFile(path, 'r') as zObject:\n", | ||
"\n", | ||
" # Extracting all the members of the zip\n", | ||
" # into a specific location.\n", | ||
" zObject.extractall(\n", | ||
" path=path_output)\n", | ||
"\n", | ||
"def threads_tasks(funcion, list1, list2):\n", | ||
" with concurrent.futures.ThreadPoolExecutor() as executor:\n", | ||
" future = executor.map(funcion, list1, list2)\n", | ||
"\n", | ||
"def tables(zip_names: list,\n", | ||
" links: list,\n", | ||
" skip: bool=False,\n", | ||
" flagment: bool=False,\n", | ||
" re_pattern: str=\"\",\n", | ||
" partition: str=\"\"):\n", | ||
"\n", | ||
" if not skip:\n", | ||
"\n", | ||
" threads_tasks(download_url, links, zip_names)\n", | ||
" output_folders = [folder.replace(\".zip\", \"\") + \"/\" for folder in zip_names]\n", | ||
" threads_tasks(extrair, zip_names, output_folders)\n", | ||
" folder_zip = zip_name[0].replace(\".zip\", \"\")\n", | ||
" folders_snp = [f\"{folder_zip}/\" + folder for folder in os.listdir(folder_zip) if folder.count(\"_shp\")]\n", | ||
" files_snp = [folders_snp[0] + \"/\" + folder for folder in os.listdir(folders_snp[0]) if folder.count(\".shp\")]\n", | ||
"\n", | ||
" if flagment:\n", | ||
" for file in files_snp:\n", | ||
"\n", | ||
" df_gio_fragment = read_dataframe(f'/content/' + file)\n", | ||
" df_gio_fragment.columns = df_gio_fragment.columns.str.lower()\n", | ||
"\n", | ||
" region_name = re.findall(re_pattern, file)[0]\n", | ||
" folder_name = f\"output/{partition}=\" + region_name\n", | ||
" create_folder([folder_name])\n", | ||
"\n", | ||
" df_gio_fragment.to_csv(folder_name + f\"/{region_name}.csv\", index=False)\n", | ||
"\n", | ||
" else:\n", | ||
"\n", | ||
" dfs = pd.concat([read_dataframe(f'/content/' + file) for file in files_snp[:1]])\n", | ||
" df_gio = gpd.GeoDataFrame(dfs)\n", | ||
" return df_gio\n", | ||
"\n", | ||
"def create_folder(name_folders: list) -> None:\n", | ||
"\n", | ||
" for folder in name_folders:\n", | ||
" try:\n", | ||
" os.mkdir(folder)\n", | ||
" except:\n", | ||
" pass\n" | ||
], | ||
"metadata": { | ||
"id": "TtxthjyEJVD-" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"# RiverAtlas" | ||
], | ||
"metadata": { | ||
"id": "Ap__vD7LJ-We" | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"link = [\"https://figshare.com/ndownloader/files/20087486\"]\n", | ||
"zip_name = [\"Global_RiverATLAS.zip\"]\n", | ||
"create_folder([\"output\", \"input\"])\n", | ||
"RiverATLASTESTE = tables(zip_name, link, False, True)" | ||
], | ||
"metadata": { | ||
"id": "EP5KIxJbJxX2" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"#BasinAtlas" | ||
], | ||
"metadata": { | ||
"id": "BTFtQtjBKC7e" | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"link = [\"https://figshare.com/ndownloader/files/20087237\"]\n", | ||
"zip_name = [\"Global_BasinATLAS.zip\"]\n", | ||
"create_folder([\"output\", \"input\"])\n", | ||
"BasinATLASTESTE = tables(zip_name, link, False, True,\n", | ||
" re_pattern=\"lev(\\d*).shp\",\n", | ||
" partition=\"level\")" | ||
], | ||
"metadata": { | ||
"id": "-ay26hSvJzRG" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"# LakeAtlas" | ||
], | ||
"metadata": { | ||
"id": "IgD_tMfIKFem" | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"link = [\"https://figshare.com/ndownloader/files/35959547\"]\n", | ||
"zip_name = [\"Global_LakeATLAS.zip\"]\n", | ||
"create_folder([\"output\", \"input\"])\n", | ||
"LakeATLASTESTE = tables(zip_name, link, False, True,\n", | ||
" re_pattern=\"_([a-z]{3}_[a-z]{4}).shp\",\n", | ||
" partition=\"region\")" | ||
], | ||
"metadata": { | ||
"id": "aN89ppSBJ06_" | ||
}, | ||
"execution_count": null, | ||
"outputs": [] | ||
} | ||
] | ||
} |
Oops, something went wrong.