diff --git a/ocf_datapipes/load/satellite.py b/ocf_datapipes/load/satellite.py index 150216132..44ba7f557 100644 --- a/ocf_datapipes/load/satellite.py +++ b/ocf_datapipes/load/satellite.py @@ -83,7 +83,7 @@ def open_sat_data( ds = open_sat_data(zarr_paths) ``` """ - _log.info("Opening satellite data: %s", zarr_path) + _log.info(f"Opening satellite data: %s, {use_15_minute_data_if_needed=}", zarr_path) # Silence the warning about large chunks. # Alternatively, we could set this to True, but that slows down loading a Satellite batch @@ -113,6 +113,8 @@ def open_sat_data( dataset = dataset.load() _log.debug("Resampling 15 minute data to 5 mins") dataset = dataset.resample(time="5T").interpolate("linear") + else: + _log.debug("Not using 15 minute data") # Remove data coordinate dimensions if they exist if "x_geostationary_coordinates" in dataset: diff --git a/ocf_datapipes/training/pvnet.py b/ocf_datapipes/training/pvnet.py index fed6ab40b..c8f0578ee 100644 --- a/ocf_datapipes/training/pvnet.py +++ b/ocf_datapipes/training/pvnet.py @@ -228,6 +228,7 @@ def _get_datapipes_dict( use_hrv=False, use_nwp=not block_nwp, # Only loaded if we aren't replacing them with zeros use_topo=False, + production=production, ) if production: configuration: Configuration = datapipes_dict["config"] @@ -636,9 +637,26 @@ def check_nans_in_satellite_data(batch: NumpyBatch) -> NumpyBatch: if np.any(np.isnan(batch[BatchKey.satellite_actual])): logger.error("Found nans values in satellite data") - for t in range(batch[BatchKey.satellite_actual].shape[1]): - if np.any(np.isnan(batch[BatchKey.satellite_actual][:, t])): - logger.error(f"Found nans values in satellite data at time index {t}") + logger.error(batch[BatchKey.satellite_actual].shape) + + # loop over time and channels + for dim in [0, 1]: + for t in range(batch[BatchKey.satellite_actual].shape[dim]): + if dim == 0: + sate_data_one_step = batch[BatchKey.satellite_actual][t] + else: + sate_data_one_step = batch[BatchKey.satellite_actual][:, t] + nans = np.isnan(sate_data_one_step) + + if np.any(nans): + percent_nans = np.sum(nans) / np.prod(sate_data_one_step.shape) * 100 + + logger.error( + f"Found nans values in satellite data at index {t} ({dim=}). " + f"{percent_nans}% of values are nans" + ) + else: + logger.error(f"Found no nans values in satellite data at index {t} {dim=}") raise ValueError("Found nans values in satellite data")