From 54c4e07e0307211feb3db259d298816e5c11b2f4 Mon Sep 17 00:00:00 2001 From: ghiggi Date: Tue, 15 Aug 2023 17:53:10 +0200 Subject: [PATCH] Add radar retrievals and manipulation utility --- gpm_api/accessor/methods.py | 148 ++++-- .../dataset/retrievals/retrieval_2a_radar.py | 449 +++++++++++++++++- gpm_api/dataset/retrievals/routines.py | 6 +- gpm_api/utils/manipulations.py | 192 +++++++- 4 files changed, 748 insertions(+), 47 deletions(-) diff --git a/gpm_api/accessor/methods.py b/gpm_api/accessor/methods.py index 07763c50..c15dc0c5 100644 --- a/gpm_api/accessor/methods.py +++ b/gpm_api/accessor/methods.py @@ -101,6 +101,7 @@ def collocate( chunks=chunks, ) + #### Profile utility def get_variable_at_bin(self, bin, variable=None): """Retrieve variable values at specific range bins.""" from gpm_api.utils.manipulations import get_variable_at_bin @@ -113,12 +114,43 @@ def get_height_at_bin(self, bin): return get_height_at_bin(self._obj, bin=bin) - def select_range_with_valid_data(self, variable): + def slice_range_with_valid_data(self, variable=None): """Select the 'range' interval with valid data.""" - from gpm_api.utils.manipulations import select_range_with_valid_data + from gpm_api.utils.manipulations import slice_range_with_valid_data - return select_range_with_valid_data(self._obj, variable) + return slice_range_with_valid_data(self._obj, variable=variable) + def slice_range_where_values(self, variable=None, vmin=-np.inf, vmax=np.inf): + """Select the 'range' interval where values are within the [vmin, vmax] interval.""" + from gpm_api.utils.manipulations import slice_range_where_values + + return slice_range_where_values(self._obj, variable=variable, vmin=vmin, vmax=vmax) + + def slice_range_at_height(self, height): + """Slice the 3D array at a given height.""" + from gpm_api.utils.manipulations import slice_range_at_height + + return slice_range_at_height(self._obj, height=height) + + def slice_range_at_value(self, value, variable=None): + """Slice the 3D arrays where the variable values are close to value.""" + from gpm_api.utils.manipulations import slice_range_at_value + + return slice_range_at_value(self._obj, variable=variable, value=value) + + def slice_range_at_max_value(self, variable=None): + """Slice the 3D arrays where the variable values are at maximum.""" + from gpm_api.utils.manipulations import slice_range_at_max_value + + return slice_range_at_max_value(self._obj, variable=variable) + + def slice_range_at_min_value(self, variable=None): + """Slice the 3D arrays where the variable values are at minimum.""" + from gpm_api.utils.manipulations import slice_range_at_min_value + + return slice_range_at_min_value(self._obj, variable=variable) + + #### Dataset utility @property def is_orbit(self): from gpm_api.checks import is_orbit @@ -167,6 +199,31 @@ def frequency_variables(self): return get_frequency_variables(self._obj) + @property + def start_time(self): + from gpm_api.io.checks import check_time + + if "time" in self._obj.coords: + start_time = self._obj["time"].values[0] + elif "gpm_time" in self._obj.coords: + start_time = self._obj["gpm_time"].values[0] + else: + raise ValueError("Time coordinate not found") + return check_time(start_time) + + @property + def end_time(self): + from gpm_api.io.checks import check_time + + if "time" in self._obj.coords: + end_time = self._obj["time"].values[-1] + elif "gpm_time" in self._obj.coords: + end_time = self._obj["gpm_time"].values[-1] + else: + raise ValueError("Time coordinate not found") + return check_time(end_time) + + #### Dataset Quality Checks @property def is_regular(self): from gpm_api.utils.checks import is_regular @@ -197,30 +254,7 @@ def has_valid_geolocation(self): return has_valid_geolocation(self._obj) - @property - def start_time(self): - from gpm_api.io.checks import check_time - - if "time" in self._obj.coords: - start_time = self._obj["time"].values[0] - elif "gpm_time" in self._obj.coords: - start_time = self._obj["gpm_time"].values[0] - else: - raise ValueError("Time coordinate not found") - return check_time(start_time) - - @property - def end_time(self): - from gpm_api.io.checks import check_time - - if "time" in self._obj.coords: - end_time = self._obj["time"].values[-1] - elif "gpm_time" in self._obj.coords: - end_time = self._obj["gpm_time"].values[-1] - else: - raise ValueError("Time coordinate not found") - return check_time(end_time) - + #### Subsetting utility def subset_by_time(self, start_time=None, end_time=None): from gpm_api.utils.time import subset_by_time @@ -256,6 +290,7 @@ def get_slices_regular(self, min_size=2): return get_slices_regular(self._obj, min_size=min_size) + #### Plotting utility def plot_transect_line(self, ax=None, color="black"): from gpm_api.visualization.profile import plot_transect_line @@ -390,17 +425,49 @@ def plot_image( ) return p + def plot_transect( + self, + variable, + ax=None, + add_colorbar=True, + zoom=True, + fig_kwargs={}, + cbar_kwargs={}, + **plot_kwargs, + ): + from gpm_api.visualization.profile import plot_transect + + da = self._obj[variable] + p = plot_transect( + da, + ax=ax, + add_colorbar=add_colorbar, + zoom=zoom, + fig_kwargs=fig_kwargs, + cbar_kwargs=cbar_kwargs, + **plot_kwargs, + ) + return p + def available_retrievals(self): """Available GPM-API retrievals for that GPM product.""" from gpm_api.dataset.retrievals.routines import available_retrievals return available_retrievals(self._obj) - def retrieve(self, name): + def retrieve(self, name, **kwargs): """Retrieve a GPM-API variable.""" from gpm_api.dataset.retrievals.routines import get_retrieval_variable - return get_retrieval_variable(self._obj, retrieval=name) + return get_retrieval_variable(self._obj, retrieval=name, **kwargs) + + def slice_range_at_temperature(self, temperature, variable_temperature="airTemperature"): + """Slice the 3D arrays along a specific isotherm.""" + from gpm_api.utils.manipulations import slice_range_at_temperature + + return slice_range_at_temperature( + self._obj, temperature=temperature, variable_temperature=variable_temperature + ) @xr.register_dataarray_accessor("gpm_api") @@ -490,6 +557,29 @@ def plot_image( ) return p + def plot_transect( + self, + ax=None, + add_colorbar=True, + zoom=True, + fig_kwargs={}, + cbar_kwargs={}, + **plot_kwargs, + ): + from gpm_api.visualization.profile import plot_transect + + da = self._obj + p = plot_transect( + da, + ax=ax, + add_colorbar=add_colorbar, + zoom=zoom, + fig_kwargs=fig_kwargs, + cbar_kwargs=cbar_kwargs, + **plot_kwargs, + ) + return p + def integrate_profile_concentration(self, name, scale_factor=None, units=None): from gpm_api.utils.manipulations import integrate_profile_concentration diff --git a/gpm_api/dataset/retrievals/retrieval_2a_radar.py b/gpm_api/dataset/retrievals/retrieval_2a_radar.py index 81737f09..9f2b7060 100644 --- a/gpm_api/dataset/retrievals/retrieval_2a_radar.py +++ b/gpm_api/dataset/retrievals/retrieval_2a_radar.py @@ -4,24 +4,457 @@ @author: ghiggi """ +import numpy as np +import xarray as xr +from gpm_api.utils.manipulations import ( + get_bright_band_mask, + get_dims_without, + get_height_at_temperature, + get_liquid_phase_mask, + get_range_axis, + get_solid_phase_mask, + get_variable_dataarray, +) -def retrieve_binClutterFreeBottomHeight(ds): + +def retrieve_heightClutterFreeBottom(ds): """Retrieve clutter height.""" da = ds.gpm_api.get_height_at_bin(bin="binClutterFreeBottom") - da.name = "binClutterFreeBottomHeight" + da.name = "heightClutterFreeBottom" return da -def retrieve_binRealSurfaceHeightKa(ds): - """Retrieve bin height of real surface at Ka band.""" +def retrieve_heightRealSurfaceKa(ds): + """Retrieve height of real surface at Ka band.""" da = ds.gpm_api.get_height_at_bin(bin=ds["binRealSurface"].sel({"radar_frequency": "Ka"})) - da.name = "binRealSurfaceHeightKa" + da.name = "heightRealSurfaceKa" return da -def retrieve_binRealSurfaceHeightKu(ds): - """Retrieve bin height of real surface at Ku band.""" +def retrieve_heightRealSurfaceKu(ds): + """Retrieve height of real surface at Ku band.""" da = ds.gpm_api.get_height_at_bin(bin=ds["binRealSurface"].sel({"radar_frequency": "Ku"})) - da.name = "binRealSurfaceHeightKu" + da.name = "heightRealSurfaceKu" return da + + +def retrieve_precipitationType(xr_obj, method="major_rain_type"): + """Retrieve major rain type from the 2A- typePrecip variable.""" + da = get_variable_dataarray(xr_obj, variable="typePrecip") + available_methods = ["major_rain_type"] + if method not in available_methods: + raise NotImplementedError(f"Implemented methods are {available_methods}") + # Decode typePrecip + # if method == "major_rain_type" + da = da / 10000000 + da = da.astype(int) + da.attrs["flag_values"] = [0, 1, 2, 3] + da.attrs["flag_meanings"] = ["no rain", "stratiform", "convective", "other"] + da.attrs["description"] = "Precipitation type" + return da + + +def retrieve_REFC( + ds, + variable="zFactorFinal", + radar_frequency="Ku", + mask_bright_band=False, + mask_solid_phase=False, + mask_liquid_phase=False, +): + """Retrieve the vertical maximum radar reflectivity in the column. + + Also called: Composite REFlectivity + """ + if mask_solid_phase and mask_liquid_phase: + raise ValueError("Either specify 'mask_solid_phase' or 'mask_liquid_phase'.") + # Retrieve required DataArrays + da = get_variable_dataarray(ds, variable=variable) + if len(da["radar_frequency"].data) != 1: + da = da.sel({"radar_frequency": radar_frequency}) + # Mask bright band region + if mask_bright_band: + da_bright_band = get_bright_band_mask(ds) + da = da.where(~da_bright_band) + # Mask ice phase region + if mask_solid_phase: + da_mask = get_solid_phase_mask(ds) + da = da.where(da_mask) + # Mask liquid phase region + if mask_liquid_phase: + da_mask = get_liquid_phase_mask(ds) + da = da.where(da_mask) + # Compute maximum + da_max = da.max(dim="range") + # Add attributes + if mask_solid_phase: + da_max.name = "REFC_liquid" + elif mask_liquid_phase: + da_max.name = "REFC_solid" + else: + da_max.name = "REFC" + da_max.attrs["units"] = "dBZ" + return da_max + + +def retrieve_REFCH(ds, variable="zFactorFinal", radar_frequency="Ku"): + """Retrieve the height at which the maximum radar reflectivity is observed. + + Also called: Composite REFlectivity Height + """ + # Retrieve required DataArrays + da = get_variable_dataarray(ds, variable=variable) + if len(da["radar_frequency"].data) != 1: + da = da.sel({"radar_frequency": radar_frequency}) + da_height = ds["height"] + + # Compute maximum reflectivity height + # - Need to use a fillValue because argmax fails if all nan along column + # - Need to compute argmax becose not possible to isel with 1D array with dask + da_all_na = np.isnan(da).all("range") + da_argmax = da.fillna(-10).argmax(dim="range", skipna=True) + da_argmax = da_argmax.compute() + da_max_height = da_height.isel({"range": da_argmax}) + da_max_height = da_max_height.where(~da_all_na) + + # Add attributes + da_max_height.name = "REFCH" + da_max_height.attrs["description"] = "Composite REFlectivity Height " + da_max_height.attrs["units"] = "dBZ" + return da_max_height + + +def retrieve_EchoDepth( + ds, + threshold, + variable="zFactorFinal", + radar_frequency="Ku", + min_threshold=0, + mask_liquid_phase=False, +): + """Retrieve Echo Depth with reflectivity above xx dBZ. + + Common thresholds are 18, 30, 50, 60 dbZ. + """ + # Retrieve required DataArrays + da = get_variable_dataarray(ds, variable=variable) + if len(da["radar_frequency"].data) != 1: + da = da.sel({"radar_frequency": radar_frequency}) + da_height = ds["height"].copy() + # Mask height bin where not raining + da_mask_3d_rain = da > min_threshold + da_height = da_height.where(da_mask_3d_rain) + + # Mask heights where Z is not above threshold + da_mask_3d = da > threshold + da_height_masked = da_height.where(da_mask_3d) + + # Mask liquid phase + if mask_liquid_phase: + da_liquid_mask = get_liquid_phase_mask(ds) + da_height_masked = da_height_masked.where(~da_liquid_mask) + + # Retrieve min and max echo height + da_max_height = da_height_masked.max(dim="range") + da_min_height = da_height_masked.min(dim="range") + + # OLD MASKING + # if mask_liquid_phase: + # da_isnan = np.isnan(da_min_height) + # da_height_melting = ds["heightZeroDeg"] + # da_height_melting = da_height_melting.where(~da_isnan) + # # If max is below the 0 °C isotherm --> set to nan + # da_max_height = da_max_height.where(da_max_height > da_height_melting) + # # If min below the 0 °C isoterm --> set the isotherm height + # da_min_height = da_min_height.where(da_min_height > da_height_melting, da_height_melting) + + # Compute depth + da_depth = da_max_height - da_min_height + + # Add attributes + da_depth.name = f"EchoDepth{threshold}dBZ" + da_depth.attrs["units"] = "m" + return da_depth + + +def retrieve_EchoTopHeight( + ds, threshold, variable="zFactorFinal", radar_frequency="Ku", min_threshold=0 +): + """Retrieve Echo Top Height (maximum altitude) for a particular reflectivity threshold. + + Common thresholds are 18, 30, 50, 60 dbZ. + References: Delobbe and Holleman, 2006; Stefan and Barbu, 2018 + """ + # Retrieve required DataArrays + da = get_variable_dataarray(ds, variable=variable) + if len(da["radar_frequency"].data) != 1: + da = da.sel({"radar_frequency": radar_frequency}) + da_height = ds["height"].copy() + + # Mask height bin where not raining + da_mask_3d_rain = da > min_threshold + da_height = da_height.where(da_mask_3d_rain) + + # Mask heights where Z is not above threshold + da_mask_3d = da > threshold + da_height_masked = da_height.where(da_mask_3d) + + # Retrieve max echo top height + da_max_height = da_height_masked.max(dim="range") + + # Add attributes + da_max_height.name = f"ETH{threshold}dBZ" + da_max_height.attrs["units"] = "m" + return da_max_height + + +def retrieve_VIL(ds, variable="zFactorFinal", radar_frequency="Ku"): + """Compute Vertically Integrated Liquid indicator. + + Represents the total amount of rain that would fall if all the liquid water + in a column inside a rain cloud (usually a thunderstorm) would be + brought to the surface. + + Reference: + Greene, D.R., and R.A. Clark, 1972: + Vertically integrated liquid water–A new analysis tool. + Mon. Wea. Rev., 100, 548-552. + Amburn and Wolf (1997) + """ + da = ds[variable].sel({"radar_frequency": radar_frequency}) + heights_arr = np.asanyarray(da["range"].data) + da_mask = np.isnan(da).all(dim="range") + + # Compute the thickness of each level (difference between adjacent heights) + thickness_arr = np.diff(heights_arr) + + # Compute average Z between range bins [in mm^6/m-3] + da_z = 10 ** (da / 10) # Takes 2.5 seconds per granule + n_ranges = len(da["range"]) + z_below = da_z.isel(range=slice(0, n_ranges - 1)).data + z_above = da_z.isel(range=slice(1, n_ranges)).data + z_avg_arr = (z_below + z_above) / 2 + + # Clip reflectivity values at 56 dBZ + z_avg_arr = z_avg_arr.clip(max=10 ** (56 / 10)) + + # Compute VIL profile + thickness_arr = np.broadcast_to(thickness_arr, z_avg_arr.shape) + vil_profile_arr = (z_avg_arr ** (4 / 7)) * thickness_arr # Takes 3.8 s seconds per granule + + # Compute VIL + range_axis = get_range_axis(da) + dims = get_dims_without(da, dims=["range"]) + scale_factor = 3.44 * 10**-6 + vil_arr = scale_factor * vil_profile_arr.sum(axis=range_axis) # DataArray.sum is very slow ! + da_vil = xr.DataArray(vil_arr, dims=dims) + + # Mask where input profile is all Nan + da_vil = da_vil.where(~da_mask) + + # Add attributes + da_vil.name = "VIL" + da_vil.attrs["description"] = "Radar-derived estimate of liquid water in a vertical column" + da_vil.attrs["units"] = "kg/m2" + return da_vil + + +def retrieve_VILD( + ds, variable="zFactorFinal", radar_frequency="Ku", threshold=18, use_echo_top=True +): + """Compute Vertically Integrated Liquid Density. + + VIL Density = VIL/Echo Top Height + + By default, the Echo Top Height (or Echo Depth) is computed for 18 dBZ. + More info at https://www.weather.gov/lmk/vil_density + + """ + da_vil = retrieve_VIL(ds, variable=variable, radar_frequency="Ku") + if use_echo_top: + da_e = retrieve_EchoTopHeight( + ds, + threshold=threshold, + variable=variable, + radar_frequency=radar_frequency, + min_threshold=0, + ) + else: + da_e = retrieve_EchoDepth( + ds, + threshold=threshold, + variable=variable, + radar_frequency=radar_frequency, + min_threshold=0, + ) + da_vild = da_vil / da_e * 1000 + # Add attributes + da_vild.name = "VILD" + da_vild.attrs["description"] = "VIL Density" + da_vild.attrs["units"] = "g/m3" + return da_vild + + +def _get_weights(da, lower_threshold, upper_threshold): + """Compute weights using a linear weighting function.""" + da_mask_nan = np.isnan(da) + da_mask_lower = da < lower_threshold + da_mask_upper = da > upper_threshold + da_weights = (da - lower_threshold) / (upper_threshold - lower_threshold) + da_weights = da_weights.where(~da_mask_lower, 0) + da_weights = da_weights.where(~da_mask_upper, 1) + da_weights = da_weights.where(~da_mask_nan) + da_weights.name = "weights" + return da_weights + + +def retrieve_HailKineticEnergy( + ds, variable="zFactorFinal", radar_frequency="Ku", lower_threshold=40, upper_threshold=50 +): + """Compute Hail Kinetic Energy. + + Lower and upper reflectivity thresholds are used to retain only + higher reflectivities typically associated with hail and filtering out + most of the lower reflectivities typically associated with liquid water. + """ + # Retrieve required DataArrays + da_z = ds[variable].sel({"radar_frequency": radar_frequency}) + + # Compute W(Z) + # - Used to define a transition zone between rain and hail reflectivities + da_z_weighted = _get_weights( + da_z, lower_threshold=lower_threshold, upper_threshold=upper_threshold + ) + # Compute Hail Kinetic Energy + scale_factor = 5 * (10**-6) + da_e = scale_factor * 10 ** (0.084 * da_z) * da_z_weighted # J/m2 + + da_e.name = "HailKineticEnergy" + da_e.attrs["description"] = "Hail kinetic energy" + da_e.attrs["units"] = "J/m2/s" + return da_e + + +def retrieve_SHI( + ds, variable="zFactorFinal", radar_frequency="Ku", lower_z_threshold=40, upper_z_threshold=50 +): + """ + Retrieve the Severe Hail Index (SHI). + + SHI is used to compute the Probability of Severe Hail (POSH) and Maximum Estimated Size of Hail (MESH). + SHI applies a thermally weighted vertical integration of reflectivity from the melting level + to the top of the storm, neglecting any reflectivity less than 40 dBZ, + thereby attempting to capture only the ice content of a storm. + + Reference: Witt et al., 1998 + + Parameters + ---------- + ds : TYPE + DESCRIPTION. + variable : str, optional + Reflectivity field. The default is "zFactorFinal". + radar_frequency : str, optional + Radar frequency. The default is "Ku". + lower_z_threshold : (int, float), optional + Lower reflectivity threshold. The default is 40 dBZ. + upper_z_threshold : (int, float), optional + Upper reflectivity threshold. The default is 50 dBZ. + + Returns + ------- + da_shi : xr.DataArray + Severe Hail Index (SHI) + """ + # Retrieve required DataArrays + da_z = ds[variable].sel({"radar_frequency": radar_frequency}) + da_t = ds["airTemperature"] + da_height = ds["height"] + da_mask = np.isnan(da_z).all(dim="range") + + # Compute W(T) + # - Used to define a transition zone between rain and hail reflectivities + # - Hail growth only occurs at temperatures < 0°C + # - Most growth for severe hail occurs at temperatures near -20°C or colder + da_height_zero_deg = get_height_at_temperature( + da_height=da_height, da_temperature=da_t, temperature=273.15 + ) # 2.5 s per granule + da_height_minus_20_deg = get_height_at_temperature( + da_height=da_height, da_temperature=da_t, temperature=273.15 - 20 + ) # 2.5 s per granule + + da_t_weighted = _get_weights( + da_height, lower_threshold=da_height_zero_deg, upper_threshold=da_height_minus_20_deg + ) # 14 s per granule + # Compute HailKineticEnergy + da_e = retrieve_HailKineticEnergy( + ds, + variable=variable, + radar_frequency=radar_frequency, + lower_threshold=lower_z_threshold, + upper_threshold=upper_z_threshold, + ) # 12 s per granule + + # Define thickness array (difference between adjacent heights) + heights_arr = np.asanyarray(ds["range"].data) + thickness_arr = np.diff(heights_arr) + thickness_arr = np.append(thickness_arr, thickness_arr[-1]) + thickness_arr = np.broadcast_to(thickness_arr, da_e.shape) + + # Compute SHI + da_shi_profile = da_e * da_t_weighted * thickness_arr # 3.45 s per granule + da_shi_profile = da_shi_profile.where(da_height > da_height_zero_deg) # 4.5 s per granule + da_shi = 0.1 * da_shi_profile.sum("range") # 4.3 s (< 1s with numpy.sum !) + + # Mask where input profile is all Nan + da_shi = da_shi.where(~da_mask) + + # Add attributes + da_shi.name = "SHI" + da_shi.attrs["description"] = "Severe Hail Index " + da_shi.attrs["units"] = "J/m/s" + return da_shi + + +def retrieve_MESH(ds): + """Retrieve the Maximum Estimated Size of Hail (MESH). + + Also known as the Maximum Expected Hail Size (MEHS). + + The “size” in MESH refers to the maximum diameter (in mm) of a hailstone. + It's an indicator that transforms SHI into hail size by fitting SHI to a chosen + percentile of maximum observed hail size (using a power-law) + """ + da_shi = retrieve_SHI(ds) + da_mesh = 2.54 * da_shi**0.5 + # Add attributes + da_mesh.name = "MESH" + da_mesh.attrs["description"] = "Maximum Estimated Size of Hail" + da_mesh.attrs["units"] = "mm" + return da_mesh + + +def retrieve_POSH(ds): + """The Probability of Severe Hail (POSH). + + The probability of 0.75-inch diameter hail occurring. + + When SHI = WT, POSH = 50%. + Output probabilities are rounded off to the nearest 10%, to avoid + conveying an unrealistic degree of precision. + """ + # Retrieve zero-degree height + da_height_0 = ds["heightZeroDeg"] + # Retrieve warning threshold + da_wt = 57.5 * da_height_0 - 121 + # Retrieve SHI + da_shi = retrieve_SHI(ds) + # Retrieve POSH + da_posh = 29 * np.log(da_shi / da_wt) + 50 + da_posh = da_shi.clip(min=0, max=1).round(1) + # Add attributes + da_posh.name = "POSH" + da_posh.attrs["description"] = "Probability of Severe Hail" + da_posh.attrs["units"] = "%" + return da_posh diff --git a/gpm_api/dataset/retrievals/routines.py b/gpm_api/dataset/retrievals/routines.py index c6298dac..3e95b6b5 100644 --- a/gpm_api/dataset/retrievals/routines.py +++ b/gpm_api/dataset/retrievals/routines.py @@ -72,7 +72,7 @@ def check_retrieval_validity(ds, retrieval): ) -def get_retrieval_variable(ds, retrieval): +def get_retrieval_variable(ds, retrieval, **kwargs): """Compute the requested variable.""" # Retrieve products product = _infer_product(ds) @@ -81,8 +81,8 @@ def get_retrieval_variable(ds, retrieval): if product in available_products(product_category="RADAR", product_level="2A"): module_name = "gpm_api.dataset.retrievals.retrieval_2a_radar" check_retrieval_validity(ds, retrieval) - return _get_retrieval_function(module_name, retrieval)(ds) + return _get_retrieval_function(module_name, retrieval)(ds, **kwargs) if product in available_products(product_category="PMW", product_level="2A"): module_name = "gpm_api.dataset.retrievals.retrieval_2a_pmw" check_retrieval_validity(ds, retrieval) - return _get_retrieval_function(module_name, retrieval)(ds) + return _get_retrieval_function(module_name, retrieval)(ds, **kwargs) diff --git a/gpm_api/utils/manipulations.py b/gpm_api/utils/manipulations.py index dfc9e339..26d4cc38 100644 --- a/gpm_api/utils/manipulations.py +++ b/gpm_api/utils/manipulations.py @@ -25,7 +25,7 @@ def integrate_profile_concentration(dataarray, name, scale_factor=None, units=No """Utility to convert LWC or IWC to LWP or IWP. Input data have unit g/m³. - Output data will have unit kg/m² + Output data will have unit kg/m² if scale_factor=1000 height a list or array of corresponding heights for each level. """ @@ -66,7 +66,7 @@ def _get_bin_datarray(ds, bin): return ds[bin] -def _get_variable_dataarray(xr_obj, variable): +def get_variable_dataarray(xr_obj, variable): if not isinstance(xr_obj, (xr.DataArray, xr.Dataset)): raise TypeError("Expecting a xr.Dataset or xr.DataArray") if isinstance(xr_obj, xr.Dataset): @@ -83,8 +83,10 @@ def get_variable_at_bin(xr_obj, bin, variable=None): Assume bin values goes from 1 to 176. """ # TODO: check behaviour when bin has 4 dimensions (i.e. binRealSurface) + # TODO: slice_range_at_bin + # Get variable dataarray - da_variable = _get_variable_dataarray(xr_obj, variable) + da_variable = get_variable_dataarray(xr_obj, variable) # Get the bin datarray da_bin = _get_bin_datarray(xr_obj, bin=bin) if da_bin.ndim >= 4: @@ -112,10 +114,10 @@ def get_height_at_bin(xr_obj, bin): return get_variable_at_bin(xr_obj, bin, variable="height") -def get_range_slices_with_valid_data(xr_obj, variable): +def get_range_slices_with_valid_data(xr_obj, variable=None): """Get the 'range' slices with valid data.""" # Extract DataArray - da = _get_variable_dataarray(xr_obj, variable) + da = get_variable_dataarray(xr_obj, variable) da = da.compute() # Check has range dimension @@ -140,8 +142,184 @@ def get_range_slices_with_valid_data(xr_obj, variable): return isel_dict -def select_range_with_valid_data(xr_obj, variable): +def slice_range_with_valid_data(xr_obj, variable=None): """Select the 'range' interval with valid data.""" - isel_dict = get_range_slices_with_valid_data(xr_obj, variable) + isel_dict = get_range_slices_with_valid_data(xr_obj, variable=variable) # Susbet the xarray object return xr_obj.isel(isel_dict) + + +def get_range_slices_within_values(xr_obj, variable=None, vmin=-np.inf, vmax=np.inf): + """Get the 'range' slices with data within a given data interval.""" + # Extract DataArray + da = get_variable_dataarray(xr_obj, variable) + da = da.compute() + + # Check has range dimension + dims = list(da.dims) + if "range" not in dims: + raise ValueError(f"The {variable} variable does not have the 'range' dimension.") + + # Remove 'range' from dimensions over which to aggregate + dims.remove("range") + + # Get bool array indicating where data are in the value interval + is_within_interval = np.logical_and(da >= vmin, da <= vmax) + + # Identify first and last True occurrence + n_bins = len(da["range"]) + first_true_index = is_within_interval.argmax(dim="range").min().item() + axis_idx = np.where(np.isin(list(da.dims), "range"))[0] + last_true_index = ( + n_bins - 1 - np.flip(is_within_interval, axis=axis_idx).argmax(dim="range").min().item() + ) + if len(first_true_index) == 0: + raise ValueError(f"No data within the requested value interval for variable {variable}.") + isel_dict = {"range": slice(first_true_index, last_true_index + 1)} + return isel_dict + + +def slice_range_where_values(xr_obj, variable=None, vmin=-np.inf, vmax=np.inf): + """Select the 'range' interval where values are within the [vmin, vmax] interval.""" + isel_dict = get_range_slices_within_values(xr_obj, variable=variable, vmin=vmin, vmax=vmax) + return xr_obj.isel(isel_dict) + + +def get_range_index_at_value(da, value): + """Retrieve index along the range dimension where the DataArray values is closest to value.""" + idx = np.abs(da - value).argmin(dim="range").compute() + return idx + + +def get_range_index_at_min(da, value): + """Retrieve index along the range dimension where the DataArray has max values.""" + idx = da.argmin(dim="range").compute() + return idx + + +def get_range_index_at_max(da, value): + """Retrieve index along the range dimension where the DataArray has minimum values values.""" + idx = da.argmax(dim="range").compute() + return idx + + +def slice_range_at_value(xr_obj, value, variable=None): + """Slice the 3D arrays where the variable values are close to value.""" + da = get_variable_dataarray(xr_obj, variable=variable) + idx = get_range_index_at_value(da=da, value=value) + return xr_obj.isel({"range": idx}) + + +def slice_range_at_max_value(xr_obj, variable=None): + """Slice the 3D arrays where the variable values are at maximum.""" + da = get_variable_dataarray(xr_obj, variable=variable) + idx = get_range_index_at_max(da=da) + return xr_obj.isel({"range": idx}) + + +def slice_range_at_min_value(xr_obj, variable=None): + """Slice the 3D arrays where the variable values are at minimum.""" + da = get_variable_dataarray(xr_obj, variable=variable) + idx = get_range_index_at_min(da=da) + return xr_obj.isel({"range": idx}) + + +def slice_range_at_temperature(ds, temperature, variable_temperature="airTemperature"): + """Slice the 3D arrays along a specific isotherm.""" + return slice_range_at_value(ds, variable=variable_temperature, value=temperature) + + +def slice_range_at_height(xr_obj, height): + """Slice the 3D array at a given height.""" + return slice_range_at_value(xr_obj, variable="height", value=height) + + +def get_height_at_temperature(da_height, da_temperature, temperature): + """Retrieve height at a specific temperature.""" + idx_desired_temperature = get_range_index_at_value(da_temperature, temperature) + da_height_desired_temperature = da_height.isel({"range": idx_desired_temperature}) + return da_height_desired_temperature + + +def get_xr_dims_dict(xr_obj): + """Get dimension dictionary.""" + if isinstance(xr_obj, xr.DataArray): + return dict(zip(xr_obj.dims, xr_obj.shape)) + elif isinstance(xr_obj, xr.Dataset): + return dict(xr_obj.dims) + else: + raise TypeError("Expecting xr.DataArray or xr.Dataset object") + + +def get_range_axis(da): + """Get range dimension axis index.""" + idx = np.where(np.isin(list(da.dims), "range"))[0].item() + return idx + + +def get_dims_without(da, dims): + """Remove specified 'dims' for list of DataArray dimensions.""" + data_dims = np.array(list(da.dims)) + new_dims = data_dims[np.isin(data_dims, dims, invert=True)].tolist() + return new_dims + + +def get_xr_shape(xr_obj, dims): + """Get xarray shape for specific dimensions.""" + dims_dict = get_xr_dims_dict(xr_obj) + shape = [dims_dict[key] for key in dims] + return shape + + +def create_bin_idx_data_array(xr_obj): + """Create a 3D DataArray with the bin index along the range dimension. + + The GPM bin index start at 1 ! + GPM bin index is equivalent to gpm_range_id + 1 + """ + dims = ["cross_track", "along_track", "range"] + shape = get_xr_shape(xr_obj, dims=dims) + bin_start = xr_obj["gpm_range_id"][0] + bin_end = xr_obj["gpm_range_id"][-1] + idx_bin = np.arange(bin_start + 1, bin_end + 1 + 1) + idx_bin = np.broadcast_to(idx_bin, shape) + da_idx_bin = xr.DataArray(idx_bin, dims=dims) + return da_idx_bin + + +def get_bright_band_mask(ds): + """Retrieve bright band mask defined by binBBBottom and binBBTop. + + The bin is numerated from top to bottom. + binBBTop has lower values than binBBBottom. + binBBBottom and binBBTop are 0 when bright band limit is not detected ! + """ + # Retrieve required DataArrays + da_bb_bottom = ds["binBBBottom"] + da_bb_top = ds["binBBTop"] + # Create 3D array with bin idex + da_idx_bin = create_bin_idx_data_array(ds) + # Identify bright band mask + da_bright_band = np.logical_and(da_idx_bin >= da_bb_top, da_idx_bin <= da_bb_bottom) + return da_bright_band + + +def get_liquid_phase_mask(ds): + """Retrieve the mask of the liquid phase profile.""" + da_height = ds["height"] + da_height_0 = ds["heightZeroDeg"] + da_mask = da_height < da_height_0 + return da_mask + + +def get_solid_phase_mask(ds): + """Retrieve the mask of the solid phase profile.""" + da_height = ds["height"] + da_height_0 = ds["heightZeroDeg"] + da_mask = da_height >= da_height_0 + return da_mask + + +def select_radar_frequency(xr_obj, radar_frequency): + """Select data related to a specific radar frequency.""" + return xr_obj.sel({"radar_frequency": radar_frequency})