Skip to content

Commit

Permalink
Merge pull request #215 from openclimatefix/issue/add-logging
Browse files Browse the repository at this point in the history
add more verbose logs, for see where the nans are in the satellte data
  • Loading branch information
peterdudfield authored Jul 12, 2023
2 parents 7728e5a + 1be38aa commit 76c1125
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
4 changes: 3 additions & 1 deletion ocf_datapipes/load/satellite.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def open_sat_data(
ds = open_sat_data(zarr_paths)
```
"""
_log.info("Opening satellite data: %s", zarr_path)
_log.info(f"Opening satellite data: %s, {use_15_minute_data_if_needed=}", zarr_path)

# Silence the warning about large chunks.
# Alternatively, we could set this to True, but that slows down loading a Satellite batch
Expand Down Expand Up @@ -113,6 +113,8 @@ def open_sat_data(
dataset = dataset.load()
_log.debug("Resampling 15 minute data to 5 mins")
dataset = dataset.resample(time="5T").interpolate("linear")
else:
_log.debug("Not using 15 minute data")

# Remove data coordinate dimensions if they exist
if "x_geostationary_coordinates" in dataset:
Expand Down
24 changes: 21 additions & 3 deletions ocf_datapipes/training/pvnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ def _get_datapipes_dict(
use_hrv=False,
use_nwp=not block_nwp, # Only loaded if we aren't replacing them with zeros
use_topo=False,
production=production,
)
if production:
configuration: Configuration = datapipes_dict["config"]
Expand Down Expand Up @@ -636,9 +637,26 @@ def check_nans_in_satellite_data(batch: NumpyBatch) -> NumpyBatch:
if np.any(np.isnan(batch[BatchKey.satellite_actual])):
logger.error("Found nans values in satellite data")

for t in range(batch[BatchKey.satellite_actual].shape[1]):
if np.any(np.isnan(batch[BatchKey.satellite_actual][:, t])):
logger.error(f"Found nans values in satellite data at time index {t}")
logger.error(batch[BatchKey.satellite_actual].shape)

# loop over time and channels
for dim in [0, 1]:
for t in range(batch[BatchKey.satellite_actual].shape[dim]):
if dim == 0:
sate_data_one_step = batch[BatchKey.satellite_actual][t]
else:
sate_data_one_step = batch[BatchKey.satellite_actual][:, t]
nans = np.isnan(sate_data_one_step)

if np.any(nans):
percent_nans = np.sum(nans) / np.prod(sate_data_one_step.shape) * 100

logger.error(
f"Found nans values in satellite data at index {t} ({dim=}). "
f"{percent_nans}% of values are nans"
)
else:
logger.error(f"Found no nans values in satellite data at index {t} {dim=}")

raise ValueError("Found nans values in satellite data")

Expand Down

0 comments on commit 76c1125

Please sign in to comment.