Skip to content

Commit

Permalink
Fix bug in remap_numeric_array
Browse files Browse the repository at this point in the history
  • Loading branch information
ghiggi committed Jun 1, 2024
1 parent 433172c commit fb3f1f3
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 27 deletions.
26 changes: 13 additions & 13 deletions gpm/dataset/decoding/decode_2a_radar.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,18 +110,18 @@ def decode_qualityTypePrecip(da):

def decode_flagShallowRain(da):
"""Decode the 2A-<RADAR> variable flagShallowRain."""
da = da.where(da > -1112, 0)
remapping_dict = {-1111: 0, 0: 1, 10: 2, 11: 3, 20: 4, 21: 5}
da.data = remap_numeric_array(da.data, remapping_dict) # TODO
da.attrs["flag_values"] = list(remapping_dict.values())
da.attrs["flag_meanings"] = [
"no rain",
"no shallow rain",
"Shallow isolated (maybe)",
"Shallow isolated (certain)",
"Shallow non-isolated (maybe)",
"Shallow non-isolated (certain)",
]
da = da.where(da >= 0) # -11111 is set to np.nan
remapping_dict = {0: 0, 10: 1, 11: 2, 20: 3, 21: 4}
da.data = remap_numeric_array(da.data, remapping_dict)
value_dict = {
0: "No shallow rain",
1: "Shallow isolated (maybe)",
2: "Shallow isolated (certain)",
3: "Shallow non-isolated (maybe)",
4: "Shallow non-isolated (certain)",
}
da.attrs["flag_values"] = list(value_dict)
da.attrs["flag_meanings"] = list(value_dict.values())
da.attrs["description"] = "Type of shallow rain"
return da

Expand Down Expand Up @@ -223,7 +223,7 @@ def decode_product(ds):
# Define variables to decode with _decode_<variable> functions
variables = [
"flagBB",
# "flagShallowRain",
"flagShallowRain",
"flagAnvil",
"flagHeavyIcePrecip",
"flagHail",
Expand Down
43 changes: 29 additions & 14 deletions gpm/dataset/decoding/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,36 @@ def add_decoded_flag(ds, variables):
return ds


def remap_numeric_array(arr, remapping_dict):
# def _np_remap_numeric_array1(arr, remapping_dict, fill_value=np.nan):
# # VERY SLOW ALTERNATIVE
# isna = np.isnan(arr)
# arr[isna] = -1 # dummy
# unique_values = np.unique(arr[~np.isnan(arr)])
# _ = [remapping_dict.setdefault(value, fill_value) for value in unique_values if value not in remapping_dict]
# remapping_dict = {float(k): float(v) for k, v in remapping_dict.items()}
# new_arr = np.vectorize(remapping_dict.__getitem__)(arr)
# new_arr[isna] = np.nan
# return new_arr


def _np_remap_numeric_array(arr, remapping_dict, fill_value=np.nan):
# Define conditions
conditions = [arr == i for i in remapping_dict]
# Define choices corresponding to conditions
choices = remapping_dict.values()
# Apply np.select to transform the array
return np.select(conditions, choices, default=fill_value)


def _dask_remap_numeric_array(arr, remapping_dict, fill_value=np.nan):
return dask.array.map_blocks(_np_remap_numeric_array, arr, remapping_dict, fill_value, dtype=arr.dtype)


def remap_numeric_array(arr, remapping_dict, fill_value=np.nan):
"""Remap the values of a numeric array."""
# TODO: this is erroneous
# TODO: implement that works with dask array also !
# TODO: implement it works if values not in remapping dict
# TODO: implement it works if only np.nan values
# remapping_dict = {-1111: 0, 0: 1, 10: 2, 11: 3, 20: 4, 21: 5}
original_values = list(remapping_dict.keys())

# Use np.searchsorted to remap the array
# TODO: works only if not np.nan and reamp to 0-n ?
return np.searchsorted(original_values, arr, sorter=np.argsort(original_values))

# Correct Alternative (but less performant) :
# np.vectorize(remapping_dict.__getitem__)(arr)
if hasattr(arr, "chunks"):
return _dask_remap_numeric_array(arr, remapping_dict, fill_value=fill_value)
return _np_remap_numeric_array(arr, remapping_dict, fill_value=fill_value)


def ceil_dataarray(da):
Expand Down

0 comments on commit fb3f1f3

Please sign in to comment.