Merge pull request #20 from openclimatefix/update #minor

Update #minor
openclimatefix · Jun 18, 2024 · 8592272 · 8592272
2 parents 085586c + 68f8495
commit 8592272
Show file tree

Hide file tree

Showing 31 changed files with 228 additions and 194 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -4,6 +4,6 @@ tag = True
 current_version = 0.1.4
 message = Bump version: {current_version} → {new_version} [skip ci]
 
-[bumpversion:file:setup.py]
-search = version="{current_version}"
-replace = version="{new_version}"
+[bumpversion:file:pvnet_summation/__init__.py]
+search = __version__ = "{current_version}"
+replace = __version__ = "{new_version}"
diff --git a/.coveragerc b/.coveragerc
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,4 @@
+[flake8]
+max-line-length = 88
+exclude = .tox,.eggs,ci/templates,build,dist, __init__.py
+ignore = E741,F403,E265,W504,E226,W503,E501,E203
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -1,13 +1,16 @@
-name: Bump version and auto-release
+name: Python Bump Version & release
 
 on:
   push:
     branches:
       - main
+    paths-ignore:
+      - "configs.example/**" # ignores all files in configs.example
+      - "**/README.md" # ignores all README files
 
 jobs:
   release:
-    uses: openclimatefix/.github/.github/workflows/python-release.yml@v1.7.2
+    uses: openclimatefix/.github/.github/workflows/python-release.yml@main
     secrets:
       token: ${{ secrets.PYPI_API_TOKEN }}
       PAT_TOKEN: ${{ secrets.PAT_TOKEN }}
diff --git a/.github/workflows/workflows.yaml → .github/workflows/test.yaml b/.github/workflows/workflows.yaml → .github/workflows/test.yaml
@@ -2,6 +2,8 @@ name: Python package tests
 
 on:
   push:
+  pull_request:
+    types: [opened, reopened]
   schedule:
     - cron: "0 12 * * 1"
 jobs:
@@ -11,9 +13,10 @@ jobs:
       # 0 means don't use pytest-xdist
       pytest_numcpus: "4"
       # pytest-cov looks at this folder
-      pytest_cov_dir: "pvnet_summation"
+      pytest_cov_dir: "pvnet"
       # extra things to install
       sudo_apt_install: "libgeos++-dev libproj-dev proj-data proj-bin"
       #      brew_install: "proj geos librttopo"
       os_list: '["ubuntu-latest"]'
       python-version: "['3.10', '3.11']"
+      extra_commands: "pip3 install -e '.[all]'"
diff --git a/.gitignore b/.gitignore
@@ -1,11 +1,24 @@
+# Custom
+config_tree.txt
+configs/
+lightning_logs/
+logs/
+output/
+checkpoints*
+csv/
+notebooks/
+*.html
+*.csv
+latest_logged_train_batch.png
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 
 # C extensions
 *.so
-.idea/
+
 # Distribution / packaging
 .Python
 build/

diff --git a/.isort.cfg b/.isort.cfg
@@ -0,0 +1,2 @@
+[settings]
+profile=black
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -12,20 +12,20 @@ repos:
       - id: detect-private-key
 
   # python code formatting/linting
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
+  - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: "v0.0.260"
+    rev: "v0.0.286"
     hooks:
       - id: ruff
         args: [--fix]
   - repo: https://github.com/psf/black
-    rev: 23.3.0
+    rev: 23.7.0
     hooks:
       - id: black
         args: [--line-length, "100"]
   # yaml formatting
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v3.0.0-alpha.6
+    rev: v3.0.2
     hooks:
       - id: prettier
         types: [yaml]
diff --git a/.prettierignore b/.prettierignore
@@ -0,0 +1 @@
+configs.example
diff --git a/README.md b/README.md
@@ -1,10 +1,90 @@
 # PVNet summation
 This project is used for training a model to sum the GSP predictions of [PVNet](https://github.com/openclimatefix/PVNet) into a national estimate.
 
-## Setup
+Using this model to sum the GSP predictions rather than doing a simple sum increases the accuracy of the national predictions and can be configured to produce estimates of the uncertainty range of the national estimate. See the [PVNet](https://github.com/openclimatefix/PVNet) repo for more details and our paper.
+
+
+## Setup / Installation
+
 ```bash
 git clone https://github.com/openclimatefix/PVNet_summation
 cd PVNet_summation
-pip install -r requirements.txt
-pip install git+https://github.com/SheffieldSolar/PV_Live-API
+pip install .
+```
+
+### Additional development dependencies
+
+```bash
+pip install ".[dev]"
+```
+
+## Getting started with running PVNet summation
+
+In order to run PVNet summation, we assume that you are already set up with
+[PVNet](https://github.com/openclimatefix/PVNet) and have met all the requirements there.
+
+Before running any code, copy the example configuration to a
+configs directory:
+
 ```
+cp -r configs.example configs
+```
+
+You will be making local amendments to these configs.
+
+### Datasets
+
+The datasets required are the same as documented in
+[PVNet](https://github.com/openclimatefix/PVNet). The only addition is that you will need PVLive
+data for the national sum i.e. GSP ID 0.
+
+
+## Generating pre-made concurrent batches of data for PVNet
+
+It is required that you preprepare batches using the `save_concurrent_batches.py` script from
+PVNet. This saves the batches as required by the PVNet model to make predictions for all GSPs for
+a single forecast init time. Seen the PVNet package for more details on this.
+
+
+### Set up and config example for batch creation
+
+
+The concurrent batches created in the step above will be augmented with a few additional pieces of
+data required for the summation model. Within your copy of `PVNet_summation/configs` make sure you
+have replaced all of the items marked with `PLACEHOLDER`
+
+### Training PVNet_summation
+
+How PVNet_summation is run is determined by the extensive configuration in the config files. The
+configs stored in `PVNet/configs.example` should work with batches created using the steps and
+batch creation config mentioned above.
+
+Make sure to update the following config files before training your model:
+
+1. In `configs/datamodule/default.yaml`:
+    - update `batch_dir` to point to the directory you stored your concurrent batches in during
+      batch creation.
+    - update `gsp_zarr_path` to point to the PVLive data containing the national estimate
+2. In `configs/model/default.yaml`:
+    - update the PVNet model for which you are training a summation model for. A new summation model
+      should be trained for each PVNet model
+    - update the hyperparameters and structure of the summation model
+3. In `configs/trainer/default.yaml`:
+    - set `accelerator: 0` if running on a system without a supported GPU
+4. In `configs.config.yaml`:
+    - It is recommended that you set `presave_pvnet_outputs` to `True`. This means that the
+      concurrent batches that you create will only be run through the PVNet model once before
+      training and their outputs saved, rather than being run on the fly on each batch throughout
+      training. This can speed up training significantly.
+
+
+Assuming you have updated the configs, you should now be able to run:
+
+```
+python run.py
+```
+
+
+## Testing
+
+You can use `python -m pytest tests` to run tests
diff --git a/configs/callbacks/default.yaml → configs.example/callbacks/default.yaml b/configs/callbacks/default.yaml → configs.example/callbacks/default.yaml
@@ -16,12 +16,12 @@ model_checkpoint:
   every_n_epochs: 1
   verbose: False
   filename: "epoch={epoch}-step={step}"
-  dirpath: "checkpoints/pvnet_summation/${model_name}" #${..model_name}
+  dirpath: "PLACEHOLDER/${model_name}"
   auto_insert_metric_name: False
   save_on_train_epoch_end: False
 
-stochastic_weight_averaging:
-  _target_: pvnet_summation.callbacks.StochasticWeightAveraging
-  swa_lrs: 0.0000001
-  swa_epoch_start: 0.8
-  annealing_epochs: 5
+#stochastic_weight_averaging:
+#  _target_: pvnet_summation.callbacks.StochasticWeightAveraging
+#  swa_lrs: 0.0000001
+#  swa_epoch_start: 0.8
+#  annealing_epochs: 5
diff --git a/configs/config.yaml → configs.example/config.yaml b/configs/config.yaml → configs.example/config.yaml
diff --git a/configs.example/datamodule/default.yaml b/configs.example/datamodule/default.yaml
@@ -0,0 +1,6 @@
+_target_: pvnet_summation.data.datamodule.DataModule
+batch_dir: "PLACEHOLDER"
+gsp_zarr_path: "PLACEHOLDER"
+batch_size: 32
+num_workers: 20
+prefetch_factor: 2
diff --git a/configs/hydra/default.yaml → configs.example/hydra/default.yaml b/configs/hydra/default.yaml → configs.example/hydra/default.yaml
diff --git a/configs/logger/wandb.yaml → configs.example/logger/wandb.yaml b/configs/logger/wandb.yaml → configs.example/logger/wandb.yaml
@@ -2,9 +2,9 @@
 
 wandb:
   _target_: lightning.pytorch.loggers.wandb.WandbLogger
-  project: "pvnet_summation"
+  project: "PLACEHOLDER"
   name: "${model_name}"
-  save_dir: "/mnt/disks/batches/"
+  save_dir: "PLACEHOLDER"
   offline: False # set True to store all logs only locally
   id: null # pass correct id to resume experiment!
   # entity: ""  # set to name of your wandb team or just remove it

diff --git a/configs/model/default.yaml → configs.example/model/default.yaml b/configs/model/default.yaml → configs.example/model/default.yaml
diff --git a/configs/readme.md → configs.example/readme.md b/configs/readme.md → configs.example/readme.md
diff --git a/configs/trainer/default.yaml → configs.example/trainer/default.yaml b/configs/trainer/default.yaml → configs.example/trainer/default.yaml
@@ -1,6 +1,5 @@
 _target_: lightning.pytorch.trainer.trainer.Trainer
 
-# set `1` to train on GPU, `0` to train on CPU only
 accelerator: gpu
 devices: auto
 
@@ -9,7 +8,6 @@ max_epochs: 100
 reload_dataloaders_every_n_epochs: 0
 num_sanity_val_steps: 8
 fast_dev_run: false
-#profiler: 'simple'
 
 #accumulate_grad_batches: 4
 #val_check_interval: 800

diff --git a/configs/callbacks/none.yaml b/configs/callbacks/none.yaml
diff --git a/configs/datamodule/default.yaml b/configs/datamodule/default.yaml
diff --git a/configs/trainer/all_params.yaml b/configs/trainer/all_params.yaml
diff --git a/environment.yml b/environment.yml
diff --git a/pvnet_summation/__init__.py b/pvnet_summation/__init__.py
@@ -1 +1,2 @@
 """PVNet_summation"""
+__version__ = "0.1.4"
diff --git a/pvnet_summation/models/base_model.py b/pvnet_summation/models/base_model.py
@@ -92,6 +92,8 @@ def __init__(
         else:
             self.pvnet_output_shape = (317, self.pvnet_model.forecast_len)
 
+        self.use_weighted_loss = False
+
     def predict_pvnet_batch(self, batch):
         """Use PVNet model to create predictions for batch"""
         gsp_batches = []
@@ -185,6 +187,11 @@ def validation_step(self, batch: dict, batch_idx):
         losses = self._calculate_common_losses(y, y_hat)
         losses.update(self._calculate_val_losses(y, y_hat))
 
+        # Store these to make horizon accuracy plot
+        self._horizon_maes.append(
+            {i: losses[f"MAE_horizon/step_{i:03}"].cpu().numpy() for i in range(self.forecast_len)}
+        )
+
         logged_losses = {f"{k}/val": v for k, v in losses.items()}
 
         # Add losses for sum of GSP predictions

diff --git a/pvnet_summation/models/model.py b/pvnet_summation/models/model.py
@@ -59,7 +59,7 @@ def __init__(
             output_network_kwargs = dict()
 
         self.model = output_network(
-            in_features=np.product(self.pvnet_output_shape),
+            in_features=np.prod(self.pvnet_output_shape),
             out_features=self.num_output_features,
             **output_network_kwargs,
         )

diff --git a/pvnet_summation/training.py b/pvnet_summation/training.py
@@ -149,6 +149,8 @@ def train(config: DictConfig) -> Optional[float]:
         for callback in callbacks:
             log.info(f"{callback}")
             if isinstance(callback, ModelCheckpoint):
+                # Need to call the .experiment property to initialise the logger
+                wandb_logger.experiment
                 callback.dirpath = "/".join(
                     callback.dirpath.split("/")[:-1] + [wandb_logger.version]
                 )