Skip to content

Commit

Permalink
raster_benchmark.ipynb fix
Browse files Browse the repository at this point in the history
  • Loading branch information
ChocopieKewpie committed Sep 11, 2024
1 parent 513abce commit 9a9d359
Showing 1 changed file with 187 additions and 42 deletions.
229 changes: 187 additions & 42 deletions raster_benchmark.ipynb
Original file line number Diff line number Diff line change
@@ -1,28 +1,58 @@
{
"cells": [
{
"cell_type": "markdown",
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"### Generating Benchmark Data"
"import os\n",
"from pathlib import Path\n",
"import pandas as pd\n",
"from joblib import Parallel, delayed\n",
"import numpy as np\n",
"from rasterio.enums import Resampling"
]
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"from Raster_benchmarking.generate_rasters import generate_nlm_rasters"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Running Benchmarks- Run all cells, and just change the num_files_to_open variable to desired benchmark"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"generate_nlm_rasters('./Raster_benchmarking/data', layers=100)"
"num_files_to_open = 100 #Change to desired input number <--------------!!!!!!!!!!!!"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Generating Benchmark Data (seeded)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"generate_nlm_rasters('./Raster_benchmarking/data', layers=num_files_to_open)"
]
},
{
Expand All @@ -34,28 +64,23 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from pathlib import Path\n",
"import pandas as pd\n",
"from joblib import Parallel, delayed\n",
"from Raster_benchmarking.DGGS_funcs import h3index_raster"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"discrete_dir = Path('Raster_benchmarking/data/discrete')\n",
"continuous_dir = Path('Raster_benchmarking/data/continuous')\n",
"output_dir = Path('Raster_benchmarking/data/dggs')\n",
"\n",
"num_files_to_open = 100 #Change to desired input number\n",
"os.makedirs(output_dir, exist_ok=True)\n",
"mode = lambda x: pd.Series.mode(x)[0]"
]
},
Expand All @@ -75,15 +100,15 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 172 ms\n",
"Wall time: 6.83 s\n"
"CPU times: total: 141 ms\n",
"Wall time: 4.32 s\n"
]
}
],
Expand All @@ -101,21 +126,20 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 109 ms\n",
"Wall time: 4.75 s\n"
"CPU times: total: 125 ms\n",
"Wall time: 4.84 s\n"
]
}
],
"source": [
"%%time\n",
"os.makedirs(output_dir, exist_ok=True)\n",
"_ = Parallel(n_jobs=-1)(delayed(h3index_raster)(file, output_dir, stem='discrete', operation=mode) for file in list(sorted(discrete_dir.glob('*.asc')))[:num_files_to_open])"
]
},
Expand All @@ -128,7 +152,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -137,7 +161,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -154,15 +178,15 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Processing files: 100%|██████████| 100/100 [00:01<00:00, 53.11it/s]\n",
"Joining DataFrames: 100%|██████████| 99/99 [00:00<00:00, 1903.51it/s]\n"
"Processing files: 100%|██████████| 100/100 [00:01<00:00, 53.26it/s]\n",
"Joining DataFrames: 100%|██████████| 99/99 [00:00<00:00, 2189.52it/s]\n"
]
}
],
Expand All @@ -171,52 +195,173 @@
"cont_df = summing(cont_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Discrete files"
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 30,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'DataFrame' object has no attribute 'h3'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[12], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mfinal_plotting\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcont_df\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32me:\\Work\\benchmarks\\dggsBenchmarks\\Raster_benchmarking\\DGGS_funcs.py:122\u001b[0m, in \u001b[0;36mfinal_plotting\u001b[1;34m(combined_df)\u001b[0m\n\u001b[0;32m 120\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfinal_plotting\u001b[39m(combined_df):\n\u001b[1;32m--> 122\u001b[0m h3_df \u001b[38;5;241m=\u001b[39m \u001b[43mcombined_df\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mh3\u001b[49m\u001b[38;5;241m.\u001b[39mh3_to_geo_boundary()\n\u001b[0;32m 124\u001b[0m \u001b[38;5;66;03m# Dissolve by 'class' and reproject to CRS 2193\u001b[39;00m\n\u001b[0;32m 125\u001b[0m h3_df \u001b[38;5;241m=\u001b[39m h3_df\u001b[38;5;241m.\u001b[39mdissolve(by\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mclass\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;241m.\u001b[39mreset_index()\u001b[38;5;241m.\u001b[39mto_crs(\u001b[38;5;241m2193\u001b[39m)\n",
"\u001b[1;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'h3'"
"name": "stderr",
"output_type": "stream",
"text": [
"Processing files: 100%|██████████| 100/100 [00:01<00:00, 50.75it/s]\n",
"Joining DataFrames: 100%|██████████| 99/99 [00:00<00:00, 2106.20it/s]\n"
]
}
],
"source": [
"final_plotting(cont_df)"
"disc_df = classify(d_files, num_files_to_open, scale=100)\n",
"disc_df = summing(disc_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Discrete files"
"# Running Raster Benchmark"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"disc_df = classify(d_files, num_files_to_open, scale=100)\n",
"disc_df = summing(disc_df)"
"from Raster_benchmarking.Raster_funcs import process_rasters, compute"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"final_plotting(disc_df)"
"os.makedirs('Raster_benchmarking/data/raster', exist_ok=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Stacking & joining files"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Continuous files"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 156 ms\n",
"Wall time: 960 ms\n"
]
}
],
"source": [
"%%time\n",
"process_rasters('Raster_benchmarking/data/continuous', num_files_to_open , output_file=f'Raster_benchmarking/data/raster/continuous_{num_files_to_open}.tif', nodata=np.iinfo(np.uint8).max, dtype=np.uint8, resampling=Resampling.nearest)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Discrete files"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 78.1 ms\n",
"Wall time: 333 ms\n"
]
}
],
"source": [
"%%time\n",
"process_rasters('Raster_benchmarking/data/discrete', num_files_to_open , output_file=f'Raster_benchmarking/data/raster/discrete_{num_files_to_open}.tif', nodata=np.iinfo(np.uint8).max, dtype=np.uint8, resampling=Resampling.nearest)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Classification"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Continuous files"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 297 ms\n",
"Wall time: 6.77 s\n"
]
}
],
"source": [
"%%time\n",
"compute(Path(f'Raster_benchmarking/data/raster/continuous_{num_files_to_open}.tif'), band_limit=num_files_to_open)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Discrete files"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 734 ms\n",
"Wall time: 7.52 s\n"
]
}
],
"source": [
"%%time\n",
"compute(Path(f'Raster_benchmarking/data/raster/discrete_{num_files_to_open}.tif'), band_limit=num_files_to_open)"
]
}
],
Expand Down

0 comments on commit 9a9d359

Please sign in to comment.