diff --git a/notebooks/2.0-test-normalization.ipynb b/notebooks/2.0-test-normalization.ipynb new file mode 100644 index 0000000..d703dce --- /dev/null +++ b/notebooks/2.0-test-normalization.ipynb @@ -0,0 +1,8697 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/freischem/.conda/envs/rs_tools_iti/lib/python3.11/site-packages/goes2go/data.py:519: FutureWarning: 'H' is deprecated and will be removed in a future version. Please use 'h' instead of 'H'.\n", + " within=pd.to_timedelta(config[\"nearesttime\"].get(\"within\", \"1H\")),\n", + "/home/freischem/.conda/envs/rs_tools_iti/lib/python3.11/site-packages/goes2go/NEW.py:188: FutureWarning: 'H' is deprecated and will be removed in a future version. Please use 'h' instead of 'H'.\n", + " within=pd.to_timedelta(config[\"nearesttime\"].get(\"within\", \"1H\")),\n" + ] + } + ], + "source": [ + "import autoroot\n", + "from rs_tools._src.preprocessing.normalize import normalize\n", + "\n", + "%matplotlib inline\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "goes_files = os.listdir('/mnt/disks/data/miniset/goes16/geoprocessed/')\n", + "goes_files = ['/mnt/disks/data/miniset/goes16/geoprocessed/' + f for f in goes_files]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# f = goes_files[0].split('/')[-1]\n", + "# f" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# ls /mnt/disks/data/miniset/goes16/geoprocessed/" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# get_list_filenames('/mnt/disks/data/miniset/goes16/geoprocessed/', ext='nc')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# from iti.data.geo_utils import get_split, get_list_filenames\n", + "\n", + "\n", + "# splits_dict = { \n", + "# \"train\": {\"years\": [2020], \"months\": [10], \"days\": list(range(1,20))},\n", + "# \"val\": {\"years\": [2020], \"months\": [10], \"days\": list(range(20,32))},\n", + "# }\n", + "\n", + "# get_split(goes_files, splits_dict['train'])\n", + " \n", + "\n", + "# # def get_files(self):\n", + "# # # Get filenames from data_dir\n", + "# # files = get_list_filenames(data_path=self.data_dir, ext=self.ext)\n", + "# # # split files based on split criteria\n", + "# # files = get_split(files=files, split_dict=self.splits_dict)\n", + "# # return files\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "import xarray as xr\n", + "# ds_goes_files = xr.open_mfdataset(goes_files[:100])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "goes_time = xr.open_dataset(goes_files[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'Rad' ()> Size: 4B\n",
+       "array(20.26424, dtype=float32)\n",
+       "Coordinates:\n",
+       "    band     int8 1B 16
" + ], + "text/plain": [ + " Size: 4B\n", + "array(20.26424, dtype=float32)\n", + "Coordinates:\n", + " band int8 1B 16" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# goes_time.Rad[15].std()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "ds_goes = normalize(goes_files[:10])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "msg_files = os.listdir('/mnt/disks/data/miniset/msg/geoprocessed/')\n", + "msg_files = ['/mnt/disks/data/miniset/msg/geoprocessed/' + f for f in msg_files]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "ds_msg = normalize(msg_files[:10])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "ds_goes = ds_goes.compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "ds_msg = ds_msg.compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 272B\n",
+       "Dimensions:          (band_wavelength: 16, band: 16)\n",
+       "Coordinates:\n",
+       "  * band_wavelength  (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n",
+       "  * band             (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n",
+       "Data variables:\n",
+       "    mean             (band) float32 64B 144.0 74.45 43.39 ... 89.34 96.57 85.23\n",
+       "    std              (band) float64 128B 97.21 80.72 53.49 ... 23.48 23.76 17.25
" + ], + "text/plain": [ + " Size: 272B\n", + "Dimensions: (band_wavelength: 16, band: 16)\n", + "Coordinates:\n", + " * band_wavelength (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n", + " * band (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n", + "Data variables:\n", + " mean (band) float32 64B 144.0 74.45 43.39 ... 89.34 96.57 85.23\n", + " std (band) float64 128B 97.21 80.72 53.49 ... 23.48 23.76 17.25" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_goes" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "from iti.data.editor import Editor\n", + "\n", + "import numpy as np\n", + "\n", + "class NormalizeEditor(Editor):\n", + " \"\"\"\n", + " Convert radiance values from mW/m^2/sr/cm^-1 to W/m^2/sr/um\n", + " \"\"\"\n", + " def __init__(self, norm_ds, key=\"data\"):\n", + " \"\"\"\n", + " Args:\n", + " norm_ds (xarray.Dataset): Dataset with normalization values\n", + " key (str): Key in dictionary to apply transformation\n", + " \"\"\"\n", + " self.key = key\n", + " self.norm = norm_ds\n", + "\n", + " def call(self, data_dict, **kwargs):\n", + " data = data_dict[self.key]\n", + " # use wavelengths and only normalise the bands that we have in the data\n", + " data_wavelengths = data_dict[\"wavelengths\"]\n", + " # Get indeces of bands to select\n", + " indeces = [np.where(self.norm.band_wavelength == wvl)[0][0] for wvl in data_wavelengths]\n", + " \n", + " # extract relevant means and stds\n", + " means = ds_goes['mean'][indeces].values\n", + " stds = ds_goes['std'][indeces].values\n", + "\n", + " # TODO apply normalization\n", + " data = \n", + "\n", + " # Convert units\n", + " # data = convert_units(data, wavelengths)\n", + " # Update dictionary\n", + " data_dict[self.key] = data\n", + " return data_dict" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "data_wavelengths = np.array([0.47, 1.38, 1.61,])\n", + "indeces = [np.where(ds_goes.band_wavelength == wvl)[0][0] for wvl in data_wavelengths]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "means = ds_goes['mean'][indeces].values\n", + "stds = ds_goes['std'][indeces].values" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([97.20899093, 8.17400094, 6.69193914])" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stds" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0, 3, 4]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_wavelengths = np.array([0.47, 1.38, 1.61,])\n", + "indexes = [np.where(ds_goes.band_wavelength == wvl)[0][0] for wvl in data_wavelengths]\n", + "indexes" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 220B\n",
+       "Dimensions:          (band_wavelength: 3, band: 16)\n",
+       "Coordinates:\n",
+       "  * band_wavelength  (band_wavelength) float32 12B 0.47 1.38 1.61\n",
+       "  * band             (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n",
+       "Data variables:\n",
+       "    mean             (band) float32 64B 144.0 74.45 43.39 ... 89.34 96.57 85.23\n",
+       "    std              (band) float64 128B 97.21 80.72 53.49 ... 23.48 23.76 17.25
" + ], + "text/plain": [ + " Size: 220B\n", + "Dimensions: (band_wavelength: 3, band: 16)\n", + "Coordinates:\n", + " * band_wavelength (band_wavelength) float32 12B 0.47 1.38 1.61\n", + " * band (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n", + "Data variables:\n", + " mean (band) float32 64B 144.0 74.45 43.39 ... 89.34 96.57 85.23\n", + " std (band) float64 128B 97.21 80.72 53.49 ... 23.48 23.76 17.25" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_goes.isel(band_wavelength = indexes)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0]),)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.where(np.array([0.47, 1.38, 1.61,]) == 0.47)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "data = goes_time['Rad'][indeces]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 484B\n",
+       "Dimensions:          (band: 11, band_wavelength: 11)\n",
+       "Coordinates:\n",
+       "  * band             (band) <U6 264B 'IR_016' 'IR_039' ... 'WV_062' 'WV_073'\n",
+       "  * band_wavelength  (band_wavelength) float64 88B 1.64 3.92 8.7 ... 6.25 7.35\n",
+       "Data variables:\n",
+       "    mean             (band) float32 44B 1.814 0.6359 51.78 ... 3.091 3.14 13.84\n",
+       "    std              (band) float64 88B 1.735 0.2418 16.02 ... 0.9149 3.935
" + ], + "text/plain": [ + " Size: 484B\n", + "Dimensions: (band: 11, band_wavelength: 11)\n", + "Coordinates:\n", + " * band (band) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 440B\n",
+       "Dimensions:          (band: 11, band_wavelength: 11)\n",
+       "Coordinates:\n",
+       "  * band             (band) <U6 264B 'IR_016' 'IR_039' ... 'WV_062' 'WV_073'\n",
+       "  * band_wavelength  (band_wavelength) float64 88B 1.64 3.92 8.7 ... 6.25 7.35\n",
+       "Data variables:\n",
+       "    mean             (band) float32 44B 1.814 0.6359 51.78 ... 3.091 3.14 13.84\n",
+       "    std              (band) float32 44B 5.322 0.1253 417.6 ... 30.0 1.242 24.09
" + ], + "text/plain": [ + " Size: 440B\n", + "Dimensions: (band: 11, band_wavelength: 11)\n", + "Coordinates:\n", + " * band (band) xr.Dataset:\n", + " return ds.mean(spatial_variables)\n", + "\n", + "\n", + "files = goes_files[:10]\n", + "\n", + "temporal_variables = [\"time\"]\n", + "spatial_variables = [\"x\",\"y\"]\n", + "\n", + "preprocess_mean = partial(spatial_mean, spatial_variables=spatial_variables)\n", + "\n", + "# calculate mean\n", + "ds_mean = xr.open_mfdataset(files, preprocess=preprocess_mean, combine=\"by_coords\", engine=\"netcdf4\")\n", + "\n", + "# ds_mean = ds_mean.mean(temporal_variables)\n", + "\n", + "def preprocess_std(ds: xr.Dataset):\n", + " # calculate the mean\n", + " ds = ((ds - ds_mean)**2).std(spatial_variables)\n", + " return ds\n", + "\n", + "\n", + "ds_std = xr.open_mfdataset(goes_files[:10], preprocess=preprocess_std, combine=\"by_coords\", engine=\"netcdf4\")\n", + "\n", + "# ds_std = ds_std.mean(temporal_variables)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 275MB\n",
+       "Dimensions:          (x: 504, y: 3687, time: 1, band_wavelength: 16, band: 16)\n",
+       "Coordinates:\n",
+       "  * x                (x) float32 2kB 3.108e+06 3.109e+06 ... 3.611e+06 3.612e+06\n",
+       "  * y                (y) float32 15kB 501.3 1.503e+03 ... 3.693e+06 3.694e+06\n",
+       "  * time             (time) <U16 64B '2020-10-27 13:05'\n",
+       "  * band_wavelength  (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n",
+       "  * band             (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n",
+       "    latitude         (y, x) float64 15MB ...\n",
+       "    longitude        (y, x) float64 15MB ...\n",
+       "    cloud_mask       (y, x) float32 7MB ...\n",
+       "Data variables:\n",
+       "    Rad              (band, y, x, time) float32 119MB -19.53 -13.84 ... -9.927\n",
+       "    DQF              (band, y, x, time) float32 119MB -0.0002013 ... 0.0
" + ], + "text/plain": [ + " Size: 275MB\n", + "Dimensions: (x: 504, y: 3687, time: 1, band_wavelength: 16, band: 16)\n", + "Coordinates:\n", + " * x (x) float32 2kB 3.108e+06 3.109e+06 ... 3.611e+06 3.612e+06\n", + " * y (y) float32 15kB 501.3 1.503e+03 ... 3.693e+06 3.694e+06\n", + " * time (time) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 400B\n",
+       "Dimensions:          (time: 1, band_wavelength: 16, band: 16)\n",
+       "Coordinates:\n",
+       "  * time             (time) <U16 64B '2020-10-27 13:05'\n",
+       "  * band_wavelength  (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n",
+       "  * band             (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n",
+       "Data variables:\n",
+       "    Rad              (band, time) float64 128B 7.591e+03 5.06e+03 ... 229.4\n",
+       "    DQF              (band, time) float64 128B 0.0004025 0.0 ... 0.0 0.0
" + ], + "text/plain": [ + " Size: 400B\n", + "Dimensions: (time: 1, band_wavelength: 16, band: 16)\n", + "Coordinates:\n", + " * time (time) \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 400B\n",
+       "Dimensions:          (band: 16, time: 1, band_wavelength: 16)\n",
+       "Coordinates:\n",
+       "  * time             (time) <U16 64B '2020-10-27 13:05'\n",
+       "  * band_wavelength  (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n",
+       "  * band             (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n",
+       "Data variables:\n",
+       "    Rad              (band, time) float64 128B 87.13 71.13 46.68 ... 20.72 15.15\n",
+       "    DQF              (band, time) float64 128B 0.02006 0.0 0.006225 ... 0.0 0.0
" + ], + "text/plain": [ + " Size: 400B\n", + "Dimensions: (band: 16, time: 1, band_wavelength: 16)\n", + "Coordinates:\n", + " * time (time) Size: 4B\n", + " array(0.11369764, dtype=float32),\n", + " Size: 4B\n", + " array(35742.094, dtype=float32))" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_std.Rad.min(), ds_std.Rad.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "11523.1943" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "1.15231943e+04" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 2kB\n",
+       "Dimensions:          (time: 10, band_wavelength: 16, band: 16)\n",
+       "Coordinates:\n",
+       "  * time             (time) <U16 640B '2020-10-07 17:05' ... '2020-10-30 15:05'\n",
+       "  * band_wavelength  (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n",
+       "  * band             (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n",
+       "Data variables:\n",
+       "    Rad              (band, time) float32 640B 1.152e+04 2.916e+04 ... 535.9\n",
+       "    DQF              (band, time) float32 640B 0.04563 0.2669 ... 0.0 0.0
" + ], + "text/plain": [ + " Size: 2kB\n", + "Dimensions: (time: 10, band_wavelength: 16, band: 16)\n", + "Coordinates:\n", + " * time (time) 1\u001b[0;31m \u001b[0mds_std\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mds_std\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrename\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'Rad'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m'std'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mds_mean\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mds_mean\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrename\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'Rad'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m'mean'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mds_std\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/envs/rs_tools/lib/python3.10/site-packages/xarray/core/dataset.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, name_dict, **names)\u001b[0m\n\u001b[1;32m 4284\u001b[0m \u001b[0mDataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrename_vars\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4285\u001b[0m \u001b[0mDataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrename_dims\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4286\u001b[0m \u001b[0mDataArray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrename\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4287\u001b[0m \"\"\"\n\u001b[0;32m-> 4288\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_rename\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mnames\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/opt/conda/envs/rs_tools/lib/python3.10/site-packages/xarray/core/dataset.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, name_dict, **names)\u001b[0m\n\u001b[1;32m 4222\u001b[0m \"\"\"\n\u001b[1;32m 4223\u001b[0m \u001b[0mname_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0meither_dict_or_kwargs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_dict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnames\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"rename\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4224\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mname_dict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4225\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdims\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4226\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 4227\u001b[0m \u001b[0;34mf\"cannot rename {k!r} because it is not a \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4228\u001b[0m \u001b[0;34m\"variable or dimension in this dataset\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4229\u001b[0m )\n", + "\u001b[0;31mValueError\u001b[0m: cannot rename 'Rad' because it is not a variable or dimension in this dataset" + ] + } + ], + "source": [ + "ds_std = ds_std.rename({'Rad':'std'})\n", + "ds_mean = ds_mean.rename({'Rad':'mean'})" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "ds_mean = ds_mean.drop_vars([])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 144B\n",
+       "Dimensions:          (band_wavelength: 16, band: 16)\n",
+       "Coordinates:\n",
+       "  * band_wavelength  (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n",
+       "  * band             (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n",
+       "Data variables:\n",
+       "    mean             (band) float32 64B dask.array<chunksize=(8,), meta=np.ndarray>
" + ], + "text/plain": [ + " Size: 144B\n", + "Dimensions: (band_wavelength: 16, band: 16)\n", + "Coordinates:\n", + " * band_wavelength (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n", + " * band (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n", + "Data variables:\n", + " mean (band) float32 64B dask.array" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_mean" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 480B\n",
+       "Dimensions:          (band_wavelength: 16, band: 32)\n",
+       "Coordinates:\n",
+       "  * band_wavelength  (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n",
+       "  * band             (band) int8 32B 1 2 3 4 5 6 7 8 ... 9 10 11 12 13 14 15 16\n",
+       "Data variables:\n",
+       "    mean             (band) float32 128B dask.array<chunksize=(8,), meta=np.ndarray>\n",
+       "    std              (band) float32 128B dask.array<chunksize=(24,), meta=np.ndarray>\n",
+       "    DQF              (band) float32 128B dask.array<chunksize=(24,), meta=np.ndarray>
" + ], + "text/plain": [ + " Size: 480B\n", + "Dimensions: (band_wavelength: 16, band: 32)\n", + "Coordinates:\n", + " * band_wavelength (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n", + " * band (band) int8 32B 1 2 3 4 5 6 7 8 ... 9 10 11 12 13 14 15 16\n", + "Data variables:\n", + " mean (band) float32 128B dask.array\n", + " std (band) float32 128B dask.array\n", + " DQF (band) float32 128B dask.array" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xr.combine_nested([ds_mean, ds_std], concat_dim='band')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "ds = xr.combine_by_coords([ds_mean, ds_std])" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 272B\n",
+       "Dimensions:          (band_wavelength: 16, band: 16)\n",
+       "Coordinates:\n",
+       "  * band_wavelength  (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n",
+       "  * band             (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n",
+       "Data variables:\n",
+       "    std              (band) float32 64B dask.array<chunksize=(8,), meta=np.ndarray>\n",
+       "    DQF              (band) float32 64B dask.array<chunksize=(8,), meta=np.ndarray>\n",
+       "    mean             (band) float32 64B dask.array<chunksize=(8,), meta=np.ndarray>
" + ], + "text/plain": [ + " Size: 272B\n", + "Dimensions: (band_wavelength: 16, band: 16)\n", + "Coordinates:\n", + " * band_wavelength (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n", + " * band (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n", + "Data variables:\n", + " std (band) float32 64B dask.array\n", + " DQF (band) float32 64B dask.array\n", + " mean (band) float32 64B dask.array" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/tmp/ipykernel_17352/2147361793.py:3: DeprecationWarning: dropping variables using `drop` is deprecated; use drop_vars.\n", + " ds = ds.drop([v for v in ds.var() if v not in ['std', 'mean']])\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 208B\n",
+       "Dimensions:          (band_wavelength: 16, band: 16)\n",
+       "Coordinates:\n",
+       "  * band_wavelength  (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n",
+       "  * band             (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n",
+       "Data variables:\n",
+       "    std              (band) float32 64B dask.array<chunksize=(8,), meta=np.ndarray>\n",
+       "    mean             (band) float32 64B dask.array<chunksize=(8,), meta=np.ndarray>
" + ], + "text/plain": [ + " Size: 208B\n", + "Dimensions: (band_wavelength: 16, band: 16)\n", + "Coordinates:\n", + " * band_wavelength (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n", + " * band (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n", + "Data variables:\n", + " std (band) float32 64B dask.array\n", + " mean (band) float32 64B dask.array" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = ds_std.rename({'Rad':'std'})\n", + "\n", + "ds = ds.assign(mean=ds_mean.Rad)\n", + "ds = ds.drop([v for v in ds.var() if v not in ['std', 'mean']])\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/tmp/ipykernel_17352/846106241.py:1: DeprecationWarning: dropping variables using `drop` is deprecated; use drop_vars.\n", + " ds.drop([v for v in ds.var() if v not in ['std', 'mean']])\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 208B\n",
+       "Dimensions:          (band_wavelength: 16, band: 16)\n",
+       "Coordinates:\n",
+       "  * band_wavelength  (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n",
+       "  * band             (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n",
+       "Data variables:\n",
+       "    std              (band) float32 64B dask.array<chunksize=(8,), meta=np.ndarray>\n",
+       "    mean             (band) float32 64B dask.array<chunksize=(8,), meta=np.ndarray>
" + ], + "text/plain": [ + " Size: 208B\n", + "Dimensions: (band_wavelength: 16, band: 16)\n", + "Coordinates:\n", + " * band_wavelength (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n", + " * band (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n", + "Data variables:\n", + " std (band) float32 64B dask.array\n", + " mean (band) float32 64B dask.array" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.drop([v for v in ds.var() if v not in ['std', 'mean']])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 208B\n",
+       "Dimensions:          (band_wavelength: 16, band: 16)\n",
+       "Coordinates:\n",
+       "  * band_wavelength  (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n",
+       "  * band             (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n",
+       "Data variables:\n",
+       "    Rad              (band) float32 64B dask.array<chunksize=(8,), meta=np.ndarray>\n",
+       "    DQF              (band) float32 64B dask.array<chunksize=(8,), meta=np.ndarray>
" + ], + "text/plain": [ + " Size: 208B\n", + "Dimensions: (band_wavelength: 16, band: 16)\n", + "Coordinates:\n", + " * band_wavelength (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n", + " * band (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n", + "Data variables:\n", + " Rad (band) float32 64B dask.array\n", + " DQF (band) float32 64B dask.array" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_std" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Could not find any dimension coordinates to use to order the datasets for concatenation", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mxr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcombine_by_coords\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mds_mean\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mds_std\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/conda/envs/rs_tools/lib/python3.10/site-packages/xarray/core/combine.py:958\u001b[0m, in \u001b[0;36mcombine_by_coords\u001b[0;34m(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs)\u001b[0m\n\u001b[1;32m 954\u001b[0m grouped_by_vars \u001b[38;5;241m=\u001b[39m itertools\u001b[38;5;241m.\u001b[39mgroupby(sorted_datasets, key\u001b[38;5;241m=\u001b[39mvars_as_keys)\n\u001b[1;32m 956\u001b[0m \u001b[38;5;66;03m# Perform the multidimensional combine on each group of data variables\u001b[39;00m\n\u001b[1;32m 957\u001b[0m \u001b[38;5;66;03m# before merging back together\u001b[39;00m\n\u001b[0;32m--> 958\u001b[0m concatenated_grouped_by_data_vars \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mtuple\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 959\u001b[0m \u001b[43m \u001b[49m\u001b[43m_combine_single_variable_hypercube\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 960\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mtuple\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdatasets_with_same_vars\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 961\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 962\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 963\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 964\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 965\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 966\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 967\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 968\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mvars\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdatasets_with_same_vars\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mgrouped_by_vars\u001b[49m\n\u001b[1;32m 969\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 971\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m merge(\n\u001b[1;32m 972\u001b[0m concatenated_grouped_by_data_vars,\n\u001b[1;32m 973\u001b[0m compat\u001b[38;5;241m=\u001b[39mcompat,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 976\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 977\u001b[0m )\n", + "File \u001b[0;32m/opt/conda/envs/rs_tools/lib/python3.10/site-packages/xarray/core/combine.py:959\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 954\u001b[0m grouped_by_vars \u001b[38;5;241m=\u001b[39m itertools\u001b[38;5;241m.\u001b[39mgroupby(sorted_datasets, key\u001b[38;5;241m=\u001b[39mvars_as_keys)\n\u001b[1;32m 956\u001b[0m \u001b[38;5;66;03m# Perform the multidimensional combine on each group of data variables\u001b[39;00m\n\u001b[1;32m 957\u001b[0m \u001b[38;5;66;03m# before merging back together\u001b[39;00m\n\u001b[1;32m 958\u001b[0m concatenated_grouped_by_data_vars \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtuple\u001b[39m(\n\u001b[0;32m--> 959\u001b[0m \u001b[43m_combine_single_variable_hypercube\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 960\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mtuple\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdatasets_with_same_vars\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 961\u001b[0m \u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 962\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_vars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_vars\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 963\u001b[0m \u001b[43m \u001b[49m\u001b[43mcoords\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcoords\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 964\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompat\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcompat\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 965\u001b[0m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 966\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_attrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcombine_attrs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 967\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 968\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m \u001b[38;5;28mvars\u001b[39m, datasets_with_same_vars \u001b[38;5;129;01min\u001b[39;00m grouped_by_vars\n\u001b[1;32m 969\u001b[0m )\n\u001b[1;32m 971\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m merge(\n\u001b[1;32m 972\u001b[0m concatenated_grouped_by_data_vars,\n\u001b[1;32m 973\u001b[0m compat\u001b[38;5;241m=\u001b[39mcompat,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 976\u001b[0m combine_attrs\u001b[38;5;241m=\u001b[39mcombine_attrs,\n\u001b[1;32m 977\u001b[0m )\n", + "File \u001b[0;32m/opt/conda/envs/rs_tools/lib/python3.10/site-packages/xarray/core/combine.py:619\u001b[0m, in \u001b[0;36m_combine_single_variable_hypercube\u001b[0;34m(datasets, fill_value, data_vars, coords, compat, join, combine_attrs)\u001b[0m\n\u001b[1;32m 613\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(datasets) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 614\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 615\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAt least one Dataset is required to resolve variable names \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 616\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfor combined hypercube.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 617\u001b[0m )\n\u001b[0;32m--> 619\u001b[0m combined_ids, concat_dims \u001b[38;5;241m=\u001b[39m \u001b[43m_infer_concat_order_from_coords\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdatasets\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fill_value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 622\u001b[0m \u001b[38;5;66;03m# check that datasets form complete hypercube\u001b[39;00m\n\u001b[1;32m 623\u001b[0m _check_shape_tile_ids(combined_ids)\n", + "File \u001b[0;32m/opt/conda/envs/rs_tools/lib/python3.10/site-packages/xarray/core/combine.py:144\u001b[0m, in \u001b[0;36m_infer_concat_order_from_coords\u001b[0;34m(datasets)\u001b[0m\n\u001b[1;32m 139\u001b[0m tile_ids \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 140\u001b[0m tile_id \u001b[38;5;241m+\u001b[39m (position,) \u001b[38;5;28;01mfor\u001b[39;00m tile_id, position \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(tile_ids, order)\n\u001b[1;32m 141\u001b[0m ]\n\u001b[1;32m 143\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(datasets) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m concat_dims:\n\u001b[0;32m--> 144\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 145\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not find any dimension coordinates to use to \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 146\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morder the datasets for concatenation\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 147\u001b[0m )\n\u001b[1;32m 149\u001b[0m combined_ids \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28mzip\u001b[39m(tile_ids, datasets))\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m combined_ids, concat_dims\n", + "\u001b[0;31mValueError\u001b[0m: Could not find any dimension coordinates to use to order the datasets for concatenation" + ] + } + ], + "source": [ + "ds = xr.combine_by_coords([ds_mean, ds_std])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import xarray as xr" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "from functools import partial\n", + "from typing import List\n", + "\n", + "def spatial_mean(ds: xr.Dataset, spatial_variables: List[str]) -> xr.Dataset:\n", + " return ds.mean(spatial_variables)\n", + "\n", + "preprocess = partial(spatial_mean, spatial_variables=['x', 'y'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "ds_mean = xr.open_mfdataset(goes_files[:100], preprocess=preprocess, combine=\"by_coords\", engine=\"netcdf4\")" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 208B\n",
+       "Dimensions:          (band_wavelength: 16, band: 16)\n",
+       "Coordinates:\n",
+       "  * band_wavelength  (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n",
+       "  * band             (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n",
+       "Data variables:\n",
+       "    Rad              (band) float32 64B dask.array<chunksize=(8,), meta=np.ndarray>\n",
+       "    DQF              (band) float32 64B dask.array<chunksize=(8,), meta=np.ndarray>
" + ], + "text/plain": [ + " Size: 208B\n", + "Dimensions: (band_wavelength: 16, band: 16)\n", + "Coordinates:\n", + " * band_wavelength (band_wavelength) float32 64B 0.47 0.64 ... 12.27 13.27\n", + " * band (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16\n", + "Data variables:\n", + " Rad (band) float32 64B dask.array\n", + " DQF (band) float32 64B dask.array" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_mean.mean(['time'])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "ds = xr.open_mfdataset(goes_files, combine=\"by_coords\", engine=\"netcdf4\")" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'Rad' (band: 16)> Size: 64B\n",
+       "dask.array<mean_agg-aggregate, shape=(16,), dtype=float32, chunksize=(8,), chunktype=numpy.ndarray>\n",
+       "Coordinates:\n",
+       "  * band     (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
" + ], + "text/plain": [ + " Size: 64B\n", + "dask.array\n", + "Coordinates:\n", + " * band (band) int8 16B 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.mean(['x', 'y']).mean(['time']).Rad" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "rs_tools_iti", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/rs_tools/_src/preprocessing/normalize.py b/rs_tools/_src/preprocessing/normalize.py index 75b7c86..d326191 100644 --- a/rs_tools/_src/preprocessing/normalize.py +++ b/rs_tools/_src/preprocessing/normalize.py @@ -2,6 +2,7 @@ import xarray as xr from pathlib import Path from functools import partial +import numpy as np def spatial_mean(ds: xr.Dataset, spatial_variables: List[str]) -> xr.Dataset: @@ -21,24 +22,26 @@ def normalize( ds_mean = ds_mean.mean(temporal_variables) - def preprocess(ds): - # calculate the mean - ds = ((ds - ds_mean)**2).std([spatial_variables]) + def preprocess(ds: xr.Dataset): + # calculate the std + # ds = ((ds - ds_mean)**2).std(spatial_variables) + N = ds.x.size * ds.y.size + ds = np.sqrt(((ds - ds_mean) ** 2).sum(['x','y']) / N) return ds ds_std = xr.open_mfdataset(files, preprocess=preprocess, combine="by_coords", engine="netcdf4") ds_std = ds_std.mean(temporal_variables) - ds = xr.combine_by_coords([ds_mean, ds_std]) - return ds + ds_mean = ds_mean.rename({'Rad':'mean'}) + ds_std = ds_std.rename({'Rad':'std'}) -# TODO: Finish normalizers -def apply_spectral_normalizer(radiances: xr.DataArray, band_norm: xr.Dataset) -> xr.DataArray: - pass + # Drop any variables that are not used (e.g. DQF for GOES) + ds_mean = ds_mean.drop_vars([v for v in ds_mean.var() if v not in ['std', 'mean']]) + ds_std = ds_std.drop_vars([v for v in ds_std.var() if v not in ['std', 'mean']]) -def apply_coordinate_normalizer(coords: xr.DataArray) -> xr.DataArray: - pass + ds = xr.combine_by_coords([ds_mean, ds_std]) + return ds