From c7e688e21bf33aa6057c0677354ad7a44e08df64 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 11 Jul 2023 16:21:18 +0100 Subject: [PATCH 1/4] add more verbose logs, for see where the nans are in the satellte data --- ocf_datapipes/training/pvnet.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/ocf_datapipes/training/pvnet.py b/ocf_datapipes/training/pvnet.py index fed6ab40b..b17dea5bc 100644 --- a/ocf_datapipes/training/pvnet.py +++ b/ocf_datapipes/training/pvnet.py @@ -636,9 +636,25 @@ def check_nans_in_satellite_data(batch: NumpyBatch) -> NumpyBatch: if np.any(np.isnan(batch[BatchKey.satellite_actual])): logger.error("Found nans values in satellite data") - for t in range(batch[BatchKey.satellite_actual].shape[1]): - if np.any(np.isnan(batch[BatchKey.satellite_actual][:, t])): - logger.error(f"Found nans values in satellite data at time index {t}") + logger.error(batch[BatchKey.satellite_actual].shape) + + # loop over time and channels + for dim in [0,1]: + for t in range(batch[BatchKey.satellite_actual].shape[dim]): + + if dim == 0: + sate_data_one_step = batch[BatchKey.satellite_actual][t] + else: + sate_data_one_step = batch[BatchKey.satellite_actual][:, t] + nans = np.isnan(sate_data_one_step) + + if np.any(nans): + + percent_nans = np.sum(nans) / np.prod(sate_data_one_step.shape) * 100 + + logger.error(f"Found nans values in satellite data at index {t} ({dim=}). {percent_nans}% of values are nans") + else: + logger.error(f"Found no nans values in satellite data at index {t} {dim=}") raise ValueError("Found nans values in satellite data") From cd96957cc804953fe2c123c3fa369acbe789fbb7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 11 Jul 2023 15:54:28 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ocf_datapipes/training/pvnet.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ocf_datapipes/training/pvnet.py b/ocf_datapipes/training/pvnet.py index b17dea5bc..9ef491dfc 100644 --- a/ocf_datapipes/training/pvnet.py +++ b/ocf_datapipes/training/pvnet.py @@ -639,9 +639,8 @@ def check_nans_in_satellite_data(batch: NumpyBatch) -> NumpyBatch: logger.error(batch[BatchKey.satellite_actual].shape) # loop over time and channels - for dim in [0,1]: + for dim in [0, 1]: for t in range(batch[BatchKey.satellite_actual].shape[dim]): - if dim == 0: sate_data_one_step = batch[BatchKey.satellite_actual][t] else: @@ -649,10 +648,11 @@ def check_nans_in_satellite_data(batch: NumpyBatch) -> NumpyBatch: nans = np.isnan(sate_data_one_step) if np.any(nans): - percent_nans = np.sum(nans) / np.prod(sate_data_one_step.shape) * 100 - logger.error(f"Found nans values in satellite data at index {t} ({dim=}). {percent_nans}% of values are nans") + logger.error( + f"Found nans values in satellite data at index {t} ({dim=}). {percent_nans}% of values are nans" + ) else: logger.error(f"Found no nans values in satellite data at index {t} {dim=}") From 73ffc94ef28f50e16c25e3642ea7458d40cc01ea Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 11 Jul 2023 17:06:42 +0100 Subject: [PATCH 3/4] lint --- ocf_datapipes/training/pvnet.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocf_datapipes/training/pvnet.py b/ocf_datapipes/training/pvnet.py index 9ef491dfc..177ee8396 100644 --- a/ocf_datapipes/training/pvnet.py +++ b/ocf_datapipes/training/pvnet.py @@ -651,7 +651,8 @@ def check_nans_in_satellite_data(batch: NumpyBatch) -> NumpyBatch: percent_nans = np.sum(nans) / np.prod(sate_data_one_step.shape) * 100 logger.error( - f"Found nans values in satellite data at index {t} ({dim=}). {percent_nans}% of values are nans" + f"Found nans values in satellite data at index {t} ({dim=}). " + f"{percent_nans}% of values are nans" ) else: logger.error(f"Found no nans values in satellite data at index {t} {dim=}") From 1be38aafbc10106c585b8b2979af54535f841f97 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 11 Jul 2023 19:31:14 +0100 Subject: [PATCH 4/4] fix: add production book --- ocf_datapipes/load/satellite.py | 4 +++- ocf_datapipes/training/pvnet.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ocf_datapipes/load/satellite.py b/ocf_datapipes/load/satellite.py index 150216132..44ba7f557 100644 --- a/ocf_datapipes/load/satellite.py +++ b/ocf_datapipes/load/satellite.py @@ -83,7 +83,7 @@ def open_sat_data( ds = open_sat_data(zarr_paths) ``` """ - _log.info("Opening satellite data: %s", zarr_path) + _log.info(f"Opening satellite data: %s, {use_15_minute_data_if_needed=}", zarr_path) # Silence the warning about large chunks. # Alternatively, we could set this to True, but that slows down loading a Satellite batch @@ -113,6 +113,8 @@ def open_sat_data( dataset = dataset.load() _log.debug("Resampling 15 minute data to 5 mins") dataset = dataset.resample(time="5T").interpolate("linear") + else: + _log.debug("Not using 15 minute data") # Remove data coordinate dimensions if they exist if "x_geostationary_coordinates" in dataset: diff --git a/ocf_datapipes/training/pvnet.py b/ocf_datapipes/training/pvnet.py index 177ee8396..c8f0578ee 100644 --- a/ocf_datapipes/training/pvnet.py +++ b/ocf_datapipes/training/pvnet.py @@ -228,6 +228,7 @@ def _get_datapipes_dict( use_hrv=False, use_nwp=not block_nwp, # Only loaded if we aren't replacing them with zeros use_topo=False, + production=production, ) if production: configuration: Configuration = datapipes_dict["config"]