Skip to content

Commit

Permalink
wip memory debugging
Browse files Browse the repository at this point in the history
  • Loading branch information
annajungbluth committed May 27, 2024
1 parent e557e7e commit a6f3237
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 11 deletions.
18 changes: 10 additions & 8 deletions rs_tools/_src/geoprocessing/goes/geoprocessor_goes16.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,8 @@ def preprocess_fn(self, ds: xr.Dataset) -> Tuple[xr.Dataset, xr.Dataset]:
# resampling
ds_subset = resample_rioxarray(ds_subset, resolution=(self.resolution, self.resolution), method=self.resample_method)

logger.info('Assigning latitude and longitude coordinates')
# assign coordinates
ds_subset = calc_latlon(ds_subset)
# ds_subset = calc_latlon(ds_subset)
del ds # delete to avoid memory problems
return ds_subset

Expand All @@ -124,7 +123,7 @@ def preprocess_fn_radiances(self, ds: xr.Dataset) -> xr.Dataset:
Returns:
xr.Dataset: The preprocessed dataset.
"""
variables = ["Rad", "DQF"] # "Rad" = radiance, "DQF" = data quality flag
variables = ["Rad"] # "Rad" = radiance, "DQF" = data quality flag

# Extract relevant attributes from original dataset
time_stamp = pd.to_datetime(ds.t.values)
Expand All @@ -136,12 +135,11 @@ def preprocess_fn_radiances(self, ds: xr.Dataset) -> xr.Dataset:
# do core preprocess function (e.g. to correct band coordinates, subset data, resample, etc.)
ds_subset = self.preprocess_fn(ds)

# select relevant variables
ds_subset = ds_subset[variables]
# convert measurement time (in seconds) to datetime
time_stamp = time_stamp.strftime("%Y-%m-%d %H:%M")
# assign bands data to each variable
ds_subset[variables] = ds_subset[variables].expand_dims({"band": band_values})
ds_subset = ds_subset[variables]
ds_subset = ds_subset.expand_dims({"band": band_values})
# attach time coordinate
ds_subset = ds_subset.assign_coords({"time": [time_stamp]})
# drop variables that will no longer be needed
Expand Down Expand Up @@ -198,7 +196,7 @@ def preprocess_radiances(self, files: List[str]) -> xr.Dataset:
logger.info(f"Number of radiance files: {len(files)}")
assert len(files) == 16

for i, ifile in enumerate(files):
for i, ifile in tqdm(enumerate(files)):
with xr.load_dataset(ifile, engine='h5netcdf') as ds_file:
ds_file = self.preprocess_fn_radiances(ds_file)
if i == 0:
Expand All @@ -210,6 +208,10 @@ def preprocess_radiances(self, files: List[str]) -> xr.Dataset:
ds = xr.concat([ds, ds_file], dim="band")
del ds_file # delete to avoid memory problems

# assign coordinates
logger.info("Assigning latitude and longitude coordinates.")
ds = calc_latlon(ds)

# # open multiple files as a single dataset
# ds = [xr.open_mfdataset(ifile, preprocess=self.preprocess_fn_radiances, concat_dim="band", combine="nested") for
# ifile in files]
Expand All @@ -232,7 +234,7 @@ def preprocess_radiances(self, files: List[str]) -> xr.Dataset:
standard_name=attrs_rad["standard_name"],
units=attrs_rad["units"],
)
ds["DQF"].attrs = {}
# ds["DQF"].attrs = {}
return ds

def preprocess_cloud_mask(self, files: List[str]) -> xr.Dataset:
Expand Down
4 changes: 1 addition & 3 deletions rs_tools/_src/geoprocessing/msg/geoprocessor_msg.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
from datetime import datetime
from pathlib import Path

# TODO: Add unit conversion?

def parse_msg_dates_from_file(file: str):
"""
Parses the date and time information from a MSG file name.
Expand Down Expand Up @@ -334,7 +332,7 @@ def preprocess_files(self):

def geoprocess(
resolution: float = None, # defined in meters
read_path: str = "./",
read_path: str = "/mnt/disks/data/miniset/msg/raw",
save_path: str = "./",
region: str = None,
resample_method: str = "bilinear",
Expand Down
2 changes: 2 additions & 0 deletions rs_tools/_src/geoprocessing/reproject.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def calc_latlon(ds: xr.Dataset) -> xr.Dataset:
"""
XX, YY = np.meshgrid(ds.x.data, ds.y.data)
lons, lats = convert_x_y_to_lat_lon(ds.rio.crs, XX, YY)
lons = np.float32(lons)
lats = np.float32(lats)
# Check if lons and lons_trans are close in value
# Set inf to NaN values
lons[lons == np.inf] = np.nan
Expand Down

0 comments on commit a6f3237

Please sign in to comment.