From 944787f49734ca7d87d93513b0582f7521377029 Mon Sep 17 00:00:00 2001
From: James Fulton <djamesfulton@yahoo.co.uk>
Date: Tue, 18 Jun 2024 09:26:41 +0000
Subject: [PATCH 1/7] minimal updates

---
 pvnet_summation/models/base_model.py | 7 +++++++
 pvnet_summation/training.py          | 2 ++
 requirements.txt                     | 4 ++--
 scripts/checkpoint_to_huggingface.py | 9 +++++----
 4 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/pvnet_summation/models/base_model.py b/pvnet_summation/models/base_model.py
index 50e23a1..caec670 100644
--- a/pvnet_summation/models/base_model.py
+++ b/pvnet_summation/models/base_model.py
@@ -91,6 +91,8 @@ def __init__(
             )
         else:
             self.pvnet_output_shape = (317, self.pvnet_model.forecast_len)
+        
+        self.use_weighted_loss = False
 
     def predict_pvnet_batch(self, batch):
         """Use PVNet model to create predictions for batch"""
@@ -184,6 +186,11 @@ def validation_step(self, batch: dict, batch_idx):
 
         losses = self._calculate_common_losses(y, y_hat)
         losses.update(self._calculate_val_losses(y, y_hat))
+        
+        # Store these to make horizon accuracy plot
+        self._horizon_maes.append(
+            {i: losses[f"MAE_horizon/step_{i:03}"].cpu().numpy() for i in range(self.forecast_len)}
+        )
 
         logged_losses = {f"{k}/val": v for k, v in losses.items()}
 
diff --git a/pvnet_summation/training.py b/pvnet_summation/training.py
index 73bcd2a..5c723fa 100644
--- a/pvnet_summation/training.py
+++ b/pvnet_summation/training.py
@@ -149,6 +149,8 @@ def train(config: DictConfig) -> Optional[float]:
         for callback in callbacks:
             log.info(f"{callback}")
             if isinstance(callback, ModelCheckpoint):
+                # Need to call the .experiment property to initialise the logger
+                wandb_logger.experiment
                 callback.dirpath = "/".join(
                     callback.dirpath.split("/")[:-1] + [wandb_logger.version]
                 )
diff --git a/requirements.txt b/requirements.txt
index c31f661..6436837 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
-ocf_datapipes>=3.3.19
-pvnet>=3.0.25
+ocf_datapipes>=3.3.33
+pvnet>=3.0.45
 numpy
 pandas
 matplotlib
diff --git a/scripts/checkpoint_to_huggingface.py b/scripts/checkpoint_to_huggingface.py
index 0a5c09c..9004762 100644
--- a/scripts/checkpoint_to_huggingface.py
+++ b/scripts/checkpoint_to_huggingface.py
@@ -1,5 +1,5 @@
 """Command line tool to push locally save model checkpoints to huggingface
-
+    
 use:
 python checkpoint_to_huggingface.py "path/to/model/checkpoints" \
     --local-path="~/tmp/this_model" \
@@ -56,9 +56,9 @@ def push_to_huggingface(
         # Only one epoch (best) saved per model
         files = glob.glob(f"{checkpoint_dir_path}/epoch*.ckpt")
         assert len(files) == 1
-        checkpoint = torch.load(files[0])
+        checkpoint = torch.load(files[0], map_location="cpu")
     else:
-        checkpoint = torch.load(f"{checkpoint_dir_path}/last.ckpt")
+        checkpoint = torch.load(f"{checkpoint_dir_path}/last.ckpt", map_location="cpu")
 
     model.load_state_dict(state_dict=checkpoint["state_dict"])
 
@@ -72,7 +72,8 @@ def push_to_huggingface(
     model.save_pretrained(
         model_output_dir,
         config=model_config,
-        wandb_model_code=wandb_id,
+        data_config=None,
+        wandb_ids=wandb_id,
         push_to_hub=push_to_hub,
         repo_id="openclimatefix/pvnet_v2_summation" if push_to_hub else None,
         card_template_path=(

From 2d1e993aed71a37135e6a30f3b8b49fc5a586ee4 Mon Sep 17 00:00:00 2001
From: James <djamesfulton@yahoo.co.uk>
Date: Tue, 18 Jun 2024 13:10:46 +0100
Subject: [PATCH 2/7] change packaging files

---
 .bumpversion.cfg                |  6 +--
 .coveragerc                     |  2 -
 .flake8                         |  4 ++
 .gitignore                      | 15 ++++++-
 .isort.cfg                      |  2 +
 .pre-commit-config.yaml         |  8 ++--
 .prettierignore                 |  1 +
 configs/callbacks/default.yaml  | 27 -----------
 configs/callbacks/none.yaml     |  0
 configs/config.yaml             | 44 ------------------
 configs/datamodule/default.yaml |  6 ---
 configs/hydra/default.yaml      | 12 -----
 configs/logger/wandb.yaml       | 15 -------
 configs/model/default.yaml      | 31 -------------
 configs/readme.md               |  7 ---
 configs/trainer/all_params.yaml | 48 --------------------
 configs/trainer/default.yaml    | 17 -------
 environment.yml                 |  7 ---
 pvnet_summation/__init__.py     |  1 +
 pyproject.toml                  | 79 +++++++++++++++++++++++++++++++++
 requirements.txt                | 22 ---------
 ruff.toml                       | 57 ------------------------
 setup.py                        | 24 ----------
 23 files changed, 108 insertions(+), 327 deletions(-)
 delete mode 100644 .coveragerc
 create mode 100644 .flake8
 create mode 100644 .isort.cfg
 create mode 100644 .prettierignore
 delete mode 100644 configs/callbacks/default.yaml
 delete mode 100644 configs/callbacks/none.yaml
 delete mode 100644 configs/config.yaml
 delete mode 100644 configs/datamodule/default.yaml
 delete mode 100644 configs/hydra/default.yaml
 delete mode 100644 configs/logger/wandb.yaml
 delete mode 100644 configs/model/default.yaml
 delete mode 100644 configs/readme.md
 delete mode 100644 configs/trainer/all_params.yaml
 delete mode 100644 configs/trainer/default.yaml
 delete mode 100644 environment.yml
 create mode 100644 pyproject.toml
 delete mode 100644 requirements.txt
 delete mode 100644 ruff.toml
 delete mode 100644 setup.py

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 7020743..0b845ee 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -4,6 +4,6 @@ tag = True
 current_version = 0.1.4
 message = Bump version: {current_version} → {new_version} [skip ci]
 
-[bumpversion:file:setup.py]
-search = version="{current_version}"
-replace = version="{new_version}"
+[bumpversion:file:pvnet_summation/__init__.py]
+search = __version__ = "{current_version}"
+replace = __version__ = "{new_version}"
diff --git a/.coveragerc b/.coveragerc
deleted file mode 100644
index c712d25..0000000
--- a/.coveragerc
+++ /dev/null
@@ -1,2 +0,0 @@
-[run]
-omit = tests/*
diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..79166d2
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,4 @@
+[flake8]
+max-line-length = 88
+exclude = .tox,.eggs,ci/templates,build,dist, __init__.py
+ignore = E741,F403,E265,W504,E226,W503,E501,E203
diff --git a/.gitignore b/.gitignore
index fa4c1b7..7d1f119 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,16 @@
+# Custom
+config_tree.txt
+configs/
+lightning_logs/
+logs/
+output/
+checkpoints*
+csv/
+notebooks/
+*.html
+*.csv
+latest_logged_train_batch.png
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -5,7 +18,7 @@ __pycache__/
 
 # C extensions
 *.so
-.idea/
+
 # Distribution / packaging
 .Python
 build/
diff --git a/.isort.cfg b/.isort.cfg
new file mode 100644
index 0000000..b9fb3f3
--- /dev/null
+++ b/.isort.cfg
@@ -0,0 +1,2 @@
+[settings]
+profile=black
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e92053a..e191b29 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -12,20 +12,20 @@ repos:
       - id: detect-private-key
 
   # python code formatting/linting
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
+  - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: "v0.0.260"
+    rev: "v0.0.286"
     hooks:
       - id: ruff
         args: [--fix]
   - repo: https://github.com/psf/black
-    rev: 23.3.0
+    rev: 23.7.0
     hooks:
       - id: black
         args: [--line-length, "100"]
   # yaml formatting
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v3.0.0-alpha.6
+    rev: v3.0.2
     hooks:
       - id: prettier
         types: [yaml]
diff --git a/.prettierignore b/.prettierignore
new file mode 100644
index 0000000..b980d35
--- /dev/null
+++ b/.prettierignore
@@ -0,0 +1 @@
+configs.example
diff --git a/configs/callbacks/default.yaml b/configs/callbacks/default.yaml
deleted file mode 100644
index 3f147ec..0000000
--- a/configs/callbacks/default.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-learning_rate_monitor:
-  _target_: lightning.pytorch.callbacks.LearningRateMonitor
-  logging_interval: "epoch"
-
-model_summary:
-  _target_: lightning.pytorch.callbacks.ModelSummary
-  max_depth: 3
-
-model_checkpoint:
-  _target_: lightning.pytorch.callbacks.ModelCheckpoint
-  # name of the logged metric which determines when model is improving
-  monitor: "${resolve_monitor_loss:${model.output_quantiles}}"
-  mode: "min" # can be "max" or "min"
-  save_top_k: 1 # save k best models (determined by above metric)
-  save_last: True # additionaly always save model from last epoch
-  every_n_epochs: 1
-  verbose: False
-  filename: "epoch={epoch}-step={step}"
-  dirpath: "checkpoints/pvnet_summation/${model_name}" #${..model_name}
-  auto_insert_metric_name: False
-  save_on_train_epoch_end: False
-
-stochastic_weight_averaging:
-  _target_: pvnet_summation.callbacks.StochasticWeightAveraging
-  swa_lrs: 0.0000001
-  swa_epoch_start: 0.8
-  annealing_epochs: 5
diff --git a/configs/callbacks/none.yaml b/configs/callbacks/none.yaml
deleted file mode 100644
index e69de29..0000000
diff --git a/configs/config.yaml b/configs/config.yaml
deleted file mode 100644
index 0cb36a1..0000000
--- a/configs/config.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-# @package _global_
-
-# specify here default training configuration
-defaults:
-  - trainer: default.yaml
-  - model: default.yaml
-  - datamodule: default.yaml
-  - callbacks: default.yaml # set this to null if you don't want to use callbacks
-  - logger: wandb.yaml # set logger here or use command line (e.g. `python run.py logger=wandb`)
-  - hydra: default.yaml
-
-# Whether to loop through the PVNet outputs and save them out before training
-presave_pvnet_outputs:
-  True
-
-  # enable color logging
-#  - override hydra/hydra_logging: colorlog
-#  - override hydra/job_logging: colorlog
-
-# path to original working directory
-# hydra hijacks working directory by changing it to the current log directory,
-# so it's useful to have this path as a special variable
-# learn more here: https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory
-work_dir: ${hydra:runtime.cwd}
-
-model_name: "default"
-
-# use `python run.py debug=true` for easy debugging!
-# this will run 1 train, val and test loop with only 1 batch
-# equivalent to running `python run.py trainer.fast_dev_run=true`
-# (this is placed here just for easier access from command line)
-debug: False
-
-# pretty print config at the start of the run using Rich library
-print_config: True
-
-# disable python warnings if they annoy you
-ignore_warnings: True
-
-# check performance on test set, using the best model achieved during training
-# lightning chooses best model based on metric specified in checkpoint callback
-test_after_training: False
-
-seed: 2727831
diff --git a/configs/datamodule/default.yaml b/configs/datamodule/default.yaml
deleted file mode 100644
index 01d5573..0000000
--- a/configs/datamodule/default.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-_target_: pvnet_summation.data.datamodule.DataModule
-batch_dir: "/mnt/disks/bigbatches/concurrent_batches_v3.6.1"
-gsp_zarr_path: "/mnt/disks/nwp/pv_gsp.zarr"
-batch_size: 32
-num_workers: 20
-prefetch_factor: 2
diff --git a/configs/hydra/default.yaml b/configs/hydra/default.yaml
deleted file mode 100644
index a086d12..0000000
--- a/configs/hydra/default.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-# output paths for hydra logs
-run:
-  dir: logs/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
-sweep:
-  dir: logs/multiruns/${now:%Y-%m-%d_%H-%M-%S}
-  subdir: ${hydra.job.num}
-
-# you can set here environment variables that are universal for all users
-# for system specific variables (like data paths) it's better to use .env file!
-job:
-  env_set:
-    EXAMPLE_VAR: "example_value"
diff --git a/configs/logger/wandb.yaml b/configs/logger/wandb.yaml
deleted file mode 100644
index 7af259e..0000000
--- a/configs/logger/wandb.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-# https://wandb.ai
-
-wandb:
-  _target_: lightning.pytorch.loggers.wandb.WandbLogger
-  project: "pvnet_summation"
-  name: "${model_name}"
-  save_dir: "/mnt/disks/batches/"
-  offline: False # set True to store all logs only locally
-  id: null # pass correct id to resume experiment!
-  # entity: ""  # set to name of your wandb team or just remove it
-  log_model: False
-  prefix: ""
-  job_type: "train"
-  group: ""
-  tags: []
diff --git a/configs/model/default.yaml b/configs/model/default.yaml
deleted file mode 100644
index 0c05838..0000000
--- a/configs/model/default.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-_target_: pvnet_summation.models.model.Model
-
-output_quantiles: null
-
-model_name: "openclimatefix/pvnet_v2"
-model_version: "898630f3f8cd4e8506525d813dd61c6d8de86144"
-
-#--------------------------------------------
-# Tabular network settings
-#--------------------------------------------
-
-output_network:
-  _target_: pvnet.models.multimodal.linear_networks.networks.ResFCNet2
-  _partial_: True
-output_network_kwargs:
-  fc_hidden_features: 128
-  n_res_blocks: 2
-  res_block_layers: 2
-  dropout_frac: 0.0
-predict_difference_from_sum: False
-
-# ----------------------------------------------
-
-optimizer:
-  _target_: pvnet.optimizers.AdamWReduceLROnPlateau
-  lr: 0.0001
-  weight_decay: 0.25
-  amsgrad: True
-  patience: 20
-  factor: 0.1
-  threshold: 0.00
diff --git a/configs/readme.md b/configs/readme.md
deleted file mode 100644
index 13cbbf2..0000000
--- a/configs/readme.md
+++ /dev/null
@@ -1,7 +0,0 @@
-The following folders how the configuration files
-
-This idea is copied from
-https://github.com/ashleve/lightning-hydra-template/blob/main/configs/experiment/example_simple.yaml
-
-run experiments by:
-`python run.py experiment=example_simple `
diff --git a/configs/trainer/all_params.yaml b/configs/trainer/all_params.yaml
deleted file mode 100644
index 64f5fdf..0000000
--- a/configs/trainer/all_params.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-_target_: pytorch_lightning.Trainer
-
-# default values for all trainer parameters
-checkpoint_callback: True
-default_root_dir: null
-gradient_clip_val: 0.0
-process_position: 0
-num_nodes: 1
-num_processes: 1
-gpus: null
-auto_select_gpus: False
-tpu_cores: null
-log_gpu_memory: null
-overfit_batches: 0.0
-track_grad_norm: -1
-check_val_every_n_epoch: 1
-fast_dev_run: False
-accumulate_grad_batches: 1
-max_epochs: 1
-min_epochs: 1
-max_steps: null
-min_steps: null
-limit_train_batches: 1.0
-limit_val_batches: 1.0
-limit_test_batches: 1.0
-val_check_interval: 1.0
-flush_logs_every_n_steps: 100
-log_every_n_steps: 50
-accelerator: null
-sync_batchnorm: False
-precision: 32
-weights_save_path: null
-num_sanity_val_steps: 2
-truncated_bptt_steps: null
-resume_from_checkpoint: null
-profiler: null
-benchmark: False
-deterministic: False
-reload_dataloaders_every_epoch: False
-auto_lr_find: False
-replace_sampler_ddp: True
-terminate_on_nan: False
-auto_scale_batch_size: False
-prepare_data_per_node: True
-plugins: null
-amp_backend: "native"
-amp_level: "O2"
-move_metrics_to_cpu: False
diff --git a/configs/trainer/default.yaml b/configs/trainer/default.yaml
deleted file mode 100644
index 3c75e63..0000000
--- a/configs/trainer/default.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-_target_: lightning.pytorch.trainer.trainer.Trainer
-
-# set `1` to train on GPU, `0` to train on CPU only
-accelerator: gpu
-devices: auto
-
-min_epochs: null
-max_epochs: 100
-reload_dataloaders_every_n_epochs: 0
-num_sanity_val_steps: 8
-fast_dev_run: false
-#profiler: 'simple'
-
-#accumulate_grad_batches: 4
-#val_check_interval: 800
-#limit_val_batches: 800
-log_every_n_steps: 50
diff --git a/environment.yml b/environment.yml
deleted file mode 100644
index c7b29cb..0000000
--- a/environment.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-name: pvnet_summation
-channels:
-  - pytorch
-  - conda-forge
-  - defaults
-dependencies:
-  - pip
diff --git a/pvnet_summation/__init__.py b/pvnet_summation/__init__.py
index ed53582..ff51c01 100644
--- a/pvnet_summation/__init__.py
+++ b/pvnet_summation/__init__.py
@@ -1 +1,2 @@
 """PVNet_summation"""
+__version__ = "0.1.4"
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..3112e9b
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,79 @@
+[project]
+name="PVNet_summation"
+description = "PVNet_summation"
+authors = [{name="James Fulton", email="info@openclimatefix.org"}]
+dynamic = ["version", "readme"]
+license={file="LICENCE"}
+
+dependencies = [
+    "ocf_datapipes>=3.3.33",
+    "pvnet>=3.0.45",
+    "numpy",
+    "pandas",
+    "matplotlib",
+    "xarray",
+    "ipykernel",
+    "h5netcdf",
+    "torch>=2.0.0",
+    "lightning>=2.0.1",
+    "pytest",
+    "pytest-cov",
+    "typer",
+    "sqlalchemy",
+    "fsspec[s3]",
+    "wandb",
+    "tensorboard",
+    "tqdm",
+    "omegaconf",
+    "hydra-core",
+    "python-dotenv",
+    "huggingface-hub==0.20.*",
+]
+
+[tool.setuptools.dynamic]
+version = {attr = "pvnet_summation.__version__"}
+readme = {file = "README.md", content-type = "text/markdown"}
+
+[tool.setuptools.package-dir]
+"pvnet_summation" = "pvnet_summation"
+
+[project.optional-dependencies]
+dev=[
+    "black",
+    "flake8",
+    "isort",
+    "mypy",
+    "pre-commit",
+    "pytest",
+    "pytest-cov",
+]
+all=["PVNet[dev]"]
+
+[tool.mypy]
+exclude = [
+    "^tests/",
+]
+disallow_untyped_defs = true
+disallow_any_unimported = true
+no_implicit_optional = true
+check_untyped_defs = true
+warn_return_any = true
+warn_unused_ignores = true
+show_error_codes = true
+warn_unreachable = true
+
+[[tool.mypy.overrides]]
+module = [
+]
+ignore_missing_imports = true
+
+[tool.pytest.ini_options]
+minversion = "6.0"
+addopts = "-ra -q"
+testpaths = [
+    "tests",
+]
+
+[tool.ruff]
+line-length = 100
+exclude = [".ipynb_checkpoints", "tests", "configs.example"]
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 6436837..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-ocf_datapipes>=3.3.33
-pvnet>=3.0.45
-numpy
-pandas
-matplotlib
-xarray
-ipykernel
-h5netcdf
-torch>=2.0.0
-lightning>=2.0.1
-pytest
-pytest-cov
-typer
-sqlalchemy
-fsspec[s3]
-wandb
-tensorboard
-tqdm
-omegaconf
-hydra-core
-python-dotenv
-huggingface-hub==0.20.*
diff --git a/ruff.toml b/ruff.toml
deleted file mode 100644
index 5df253f..0000000
--- a/ruff.toml
+++ /dev/null
@@ -1,57 +0,0 @@
-# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
-select = ["B", "E", "F", "D", "I"]
-ignore = ["D200","D202","D210","D212","D415","D105",]
-
-# Allow autofix for all enabled rules (when `--fix`) is provided.
-fixable = ["A", "B", "C", "D", "E", "F", "I"]
-unfixable = []
-
-# Exclude a variety of commonly ignored directories.
-exclude = [
-    ".bzr",
-    ".direnv",
-    ".eggs",
-    ".git",
-    ".hg",
-    ".mypy_cache",
-    ".nox",
-    ".pants.d",
-    ".pytype",
-    ".ruff_cache",
-    ".svn",
-    ".tox",
-    ".venv",
-    "__pypackages__",
-    "_build",
-    "buck-out",
-    "build",
-    "dist",
-    "node_modules",
-    "venv",
-    "tests",
-]
-
-# Same as Black.
-line-length = 100
-
-# Allow unused variables when underscore-prefixed.
-dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
-
-# Assume Python 3.10.
-target-version = "py310"
-fix = false
-
-# Group violations by containing file.
-format = "github"
-ignore-init-module-imports = true
-
-[mccabe]
-# Unlike Flake8, default to a complexity level of 10.
-max-complexity = 10
-
-[pydocstyle]
-# Use Google-style docstrings.
-convention = "google"
-
-[per-file-ignores]
-"__init__.py" = ["F401", "E402"]
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 99bbbf7..0000000
--- a/setup.py
+++ /dev/null
@@ -1,24 +0,0 @@
-""" Usual setup file for package """
-# read the contents of your README file
-from pathlib import Path
-
-from setuptools import find_packages, setup
-
-this_directory = Path(__file__).parent
-long_description = (this_directory / "README.md").read_text()
-install_requires = (this_directory / "requirements.txt").read_text().splitlines()
-
-setup(
-    name="PVNet_summation",
-    version="0.1.4",
-    license="MIT",
-    description="Package for training summation model for PVNet",
-    author="James Fulton",
-    author_email="info@openclimatefix.org",
-    company="Open Climate Fix Ltd",
-    install_requires=install_requires,
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    include_package_data=True,
-    packages=find_packages(),
-)

From 46565979b09bac9d35f93485a877041db21fa944 Mon Sep 17 00:00:00 2001
From: James <djamesfulton@yahoo.co.uk>
Date: Tue, 18 Jun 2024 13:47:20 +0100
Subject: [PATCH 3/7] update configs and readme

---
 README.md                               | 86 ++++++++++++++++++++++++-
 configs.example/callbacks/default.yaml  | 27 ++++++++
 configs.example/config.yaml             | 44 +++++++++++++
 configs.example/datamodule/default.yaml |  6 ++
 configs.example/hydra/default.yaml      | 12 ++++
 configs.example/logger/wandb.yaml       | 15 +++++
 configs.example/model/default.yaml      | 31 +++++++++
 configs.example/readme.md               |  7 ++
 configs.example/trainer/default.yaml    | 15 +++++
 9 files changed, 240 insertions(+), 3 deletions(-)
 create mode 100644 configs.example/callbacks/default.yaml
 create mode 100644 configs.example/config.yaml
 create mode 100644 configs.example/datamodule/default.yaml
 create mode 100644 configs.example/hydra/default.yaml
 create mode 100644 configs.example/logger/wandb.yaml
 create mode 100644 configs.example/model/default.yaml
 create mode 100644 configs.example/readme.md
 create mode 100644 configs.example/trainer/default.yaml

diff --git a/README.md b/README.md
index 249f601..767bf3e 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,90 @@
 # PVNet summation
 This project is used for training a model to sum the GSP predictions of [PVNet](https://github.com/openclimatefix/PVNet) into a national estimate.
 
-## Setup
+Using this model to sum the GSP predictions rather than doing a simple sum increases the accuracy of the national predictions and can be configured to produce estimates of the uncertainty range of the national estimate. See the [PVNet](https://github.com/openclimatefix/PVNet) repo for more details and our paper.
+
+
+## Setup / Installation
+
 ```bash
 git clone https://github.com/openclimatefix/PVNet_summation
 cd PVNet_summation
-pip install -r requirements.txt
-pip install git+https://github.com/SheffieldSolar/PV_Live-API
+pip install .
+```
+
+### Additional development dependencies
+
+```bash
+pip install ".[dev]"
+```
+
+## Getting started with running PVNet summation
+
+In order to run PVNet summation, we assume that you are already set up with 
+[PVNet](https://github.com/openclimatefix/PVNet) and have met all the requirements there.
+
+Before running any code, copy the example configuration to a
+configs directory:
+
 ```
+cp -r configs.example configs
+```
+
+You will be making local amendments to these configs.
+
+### Datasets
+
+The datasets required are the same as documented in 
+[PVNet](https://github.com/openclimatefix/PVNet). The only addition is that you will need PVLive 
+data for the national sum i.e. GSP ID 0.
+
+
+## Generating pre-made concurrent batches of data for PVNet
+
+It is required that you preprepare batches using the `save_concurrent_batches.py` script from 
+PVNet. This saves the batches as required by the PVNet model to make predictions for all GSPs for 
+a single forecast init time. Seen the PVNet package for more details on this.
+
+
+### Set up and config example for batch creation
+
+
+The concurrent batches created in the step above will be augmented with a few additional pieces of
+data required for the summation model. Within your copy of `PVNet_summation/configs` make sure you 
+have replaced all of the items marked with `PLACEHOLDER`
+
+### Training PVNet_summation
+
+How PVNet_summation is run is determined by the extensive configuration in the config files. The 
+configs stored in `PVNet/configs.example` should work with batches created using the steps and 
+batch creation config mentioned above.
+
+Make sure to update the following config files before training your model:
+
+1. In `configs/datamodule/default.yaml`:
+    - update `batch_dir` to point to the directory you stored your concurrent batches in during 
+      batch creation.
+    - update `gsp_zarr_path` to point to the PVLive data containing the national estimate
+2. In `configs/model/default.yaml`:
+    - update the PVNet model for which you are training a summation model for. A new summation model
+      should be trained for each PVNet model
+    - update the hyperparameters and structure of the summation model
+3. In `configs/trainer/default.yaml`:
+    - set `accelerator: 0` if running on a system without a supported GPU
+4. In `configs.config.yaml`:
+    - It is recommended that you set `presave_pvnet_outputs` to `True`. This means that the
+      concurrent batches that you create will only be run through the PVNet model once before
+      training and their outputs saved, rather than being run on the fly on each batch throughout 
+      training. This can speed up training significantly.
+
+
+Assuming you have updated the configs, you should now be able to run:
+
+```
+python run.py
+```
+
+
+## Testing
+
+You can use `python -m pytest tests` to run tests
diff --git a/configs.example/callbacks/default.yaml b/configs.example/callbacks/default.yaml
new file mode 100644
index 0000000..26fbda2
--- /dev/null
+++ b/configs.example/callbacks/default.yaml
@@ -0,0 +1,27 @@
+learning_rate_monitor:
+  _target_: lightning.pytorch.callbacks.LearningRateMonitor
+  logging_interval: "epoch"
+
+model_summary:
+  _target_: lightning.pytorch.callbacks.ModelSummary
+  max_depth: 3
+
+model_checkpoint:
+  _target_: lightning.pytorch.callbacks.ModelCheckpoint
+  # name of the logged metric which determines when model is improving
+  monitor: "${resolve_monitor_loss:${model.output_quantiles}}"
+  mode: "min" # can be "max" or "min"
+  save_top_k: 1 # save k best models (determined by above metric)
+  save_last: True # additionaly always save model from last epoch
+  every_n_epochs: 1
+  verbose: False
+  filename: "epoch={epoch}-step={step}"
+  dirpath: "PLACEHOLDER/${model_name}"
+  auto_insert_metric_name: False
+  save_on_train_epoch_end: False
+
+#stochastic_weight_averaging:
+#  _target_: pvnet_summation.callbacks.StochasticWeightAveraging
+#  swa_lrs: 0.0000001
+#  swa_epoch_start: 0.8
+#  annealing_epochs: 5
diff --git a/configs.example/config.yaml b/configs.example/config.yaml
new file mode 100644
index 0000000..0cb36a1
--- /dev/null
+++ b/configs.example/config.yaml
@@ -0,0 +1,44 @@
+# @package _global_
+
+# specify here default training configuration
+defaults:
+  - trainer: default.yaml
+  - model: default.yaml
+  - datamodule: default.yaml
+  - callbacks: default.yaml # set this to null if you don't want to use callbacks
+  - logger: wandb.yaml # set logger here or use command line (e.g. `python run.py logger=wandb`)
+  - hydra: default.yaml
+
+# Whether to loop through the PVNet outputs and save them out before training
+presave_pvnet_outputs:
+  True
+
+  # enable color logging
+#  - override hydra/hydra_logging: colorlog
+#  - override hydra/job_logging: colorlog
+
+# path to original working directory
+# hydra hijacks working directory by changing it to the current log directory,
+# so it's useful to have this path as a special variable
+# learn more here: https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory
+work_dir: ${hydra:runtime.cwd}
+
+model_name: "default"
+
+# use `python run.py debug=true` for easy debugging!
+# this will run 1 train, val and test loop with only 1 batch
+# equivalent to running `python run.py trainer.fast_dev_run=true`
+# (this is placed here just for easier access from command line)
+debug: False
+
+# pretty print config at the start of the run using Rich library
+print_config: True
+
+# disable python warnings if they annoy you
+ignore_warnings: True
+
+# check performance on test set, using the best model achieved during training
+# lightning chooses best model based on metric specified in checkpoint callback
+test_after_training: False
+
+seed: 2727831
diff --git a/configs.example/datamodule/default.yaml b/configs.example/datamodule/default.yaml
new file mode 100644
index 0000000..271fecb
--- /dev/null
+++ b/configs.example/datamodule/default.yaml
@@ -0,0 +1,6 @@
+_target_: pvnet_summation.data.datamodule.DataModule
+batch_dir: "PLACEHOLDER"
+gsp_zarr_path: "PLACEHOLDER"
+batch_size: 32
+num_workers: 20
+prefetch_factor: 2
diff --git a/configs.example/hydra/default.yaml b/configs.example/hydra/default.yaml
new file mode 100644
index 0000000..a086d12
--- /dev/null
+++ b/configs.example/hydra/default.yaml
@@ -0,0 +1,12 @@
+# output paths for hydra logs
+run:
+  dir: logs/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+sweep:
+  dir: logs/multiruns/${now:%Y-%m-%d_%H-%M-%S}
+  subdir: ${hydra.job.num}
+
+# you can set here environment variables that are universal for all users
+# for system specific variables (like data paths) it's better to use .env file!
+job:
+  env_set:
+    EXAMPLE_VAR: "example_value"
diff --git a/configs.example/logger/wandb.yaml b/configs.example/logger/wandb.yaml
new file mode 100644
index 0000000..f01081e
--- /dev/null
+++ b/configs.example/logger/wandb.yaml
@@ -0,0 +1,15 @@
+# https://wandb.ai
+
+wandb:
+  _target_: lightning.pytorch.loggers.wandb.WandbLogger
+  project: "PLACEHOLDER"
+  name: "${model_name}"
+  save_dir: "PLACEHOLDER"
+  offline: False # set True to store all logs only locally
+  id: null # pass correct id to resume experiment!
+  # entity: ""  # set to name of your wandb team or just remove it
+  log_model: False
+  prefix: ""
+  job_type: "train"
+  group: ""
+  tags: []
diff --git a/configs.example/model/default.yaml b/configs.example/model/default.yaml
new file mode 100644
index 0000000..0c05838
--- /dev/null
+++ b/configs.example/model/default.yaml
@@ -0,0 +1,31 @@
+_target_: pvnet_summation.models.model.Model
+
+output_quantiles: null
+
+model_name: "openclimatefix/pvnet_v2"
+model_version: "898630f3f8cd4e8506525d813dd61c6d8de86144"
+
+#--------------------------------------------
+# Tabular network settings
+#--------------------------------------------
+
+output_network:
+  _target_: pvnet.models.multimodal.linear_networks.networks.ResFCNet2
+  _partial_: True
+output_network_kwargs:
+  fc_hidden_features: 128
+  n_res_blocks: 2
+  res_block_layers: 2
+  dropout_frac: 0.0
+predict_difference_from_sum: False
+
+# ----------------------------------------------
+
+optimizer:
+  _target_: pvnet.optimizers.AdamWReduceLROnPlateau
+  lr: 0.0001
+  weight_decay: 0.25
+  amsgrad: True
+  patience: 20
+  factor: 0.1
+  threshold: 0.00
diff --git a/configs.example/readme.md b/configs.example/readme.md
new file mode 100644
index 0000000..13cbbf2
--- /dev/null
+++ b/configs.example/readme.md
@@ -0,0 +1,7 @@
+The following folders how the configuration files
+
+This idea is copied from
+https://github.com/ashleve/lightning-hydra-template/blob/main/configs/experiment/example_simple.yaml
+
+run experiments by:
+`python run.py experiment=example_simple `
diff --git a/configs.example/trainer/default.yaml b/configs.example/trainer/default.yaml
new file mode 100644
index 0000000..9eb7b01
--- /dev/null
+++ b/configs.example/trainer/default.yaml
@@ -0,0 +1,15 @@
+_target_: lightning.pytorch.trainer.trainer.Trainer
+
+accelerator: gpu
+devices: auto
+
+min_epochs: null
+max_epochs: 100
+reload_dataloaders_every_n_epochs: 0
+num_sanity_val_steps: 8
+fast_dev_run: false
+
+#accumulate_grad_batches: 4
+#val_check_interval: 800
+#limit_val_batches: 800
+log_every_n_steps: 50

From 114f5d7a1d232e55101c7c7d5910deed24d3b4d6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 18 Jun 2024 12:49:12 +0000
Subject: [PATCH 4/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 README.md                            | 20 ++++++++++----------
 pvnet_summation/__init__.py          |  2 +-
 pvnet_summation/models/base_model.py |  4 ++--
 scripts/checkpoint_to_huggingface.py |  2 +-
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 767bf3e..f775c60 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ pip install ".[dev]"
 
 ## Getting started with running PVNet summation
 
-In order to run PVNet summation, we assume that you are already set up with 
+In order to run PVNet summation, we assume that you are already set up with
 [PVNet](https://github.com/openclimatefix/PVNet) and have met all the requirements there.
 
 Before running any code, copy the example configuration to a
@@ -34,15 +34,15 @@ You will be making local amendments to these configs.
 
 ### Datasets
 
-The datasets required are the same as documented in 
-[PVNet](https://github.com/openclimatefix/PVNet). The only addition is that you will need PVLive 
+The datasets required are the same as documented in
+[PVNet](https://github.com/openclimatefix/PVNet). The only addition is that you will need PVLive
 data for the national sum i.e. GSP ID 0.
 
 
 ## Generating pre-made concurrent batches of data for PVNet
 
-It is required that you preprepare batches using the `save_concurrent_batches.py` script from 
-PVNet. This saves the batches as required by the PVNet model to make predictions for all GSPs for 
+It is required that you preprepare batches using the `save_concurrent_batches.py` script from
+PVNet. This saves the batches as required by the PVNet model to make predictions for all GSPs for
 a single forecast init time. Seen the PVNet package for more details on this.
 
 
@@ -50,19 +50,19 @@ a single forecast init time. Seen the PVNet package for more details on this.
 
 
 The concurrent batches created in the step above will be augmented with a few additional pieces of
-data required for the summation model. Within your copy of `PVNet_summation/configs` make sure you 
+data required for the summation model. Within your copy of `PVNet_summation/configs` make sure you
 have replaced all of the items marked with `PLACEHOLDER`
 
 ### Training PVNet_summation
 
-How PVNet_summation is run is determined by the extensive configuration in the config files. The 
-configs stored in `PVNet/configs.example` should work with batches created using the steps and 
+How PVNet_summation is run is determined by the extensive configuration in the config files. The
+configs stored in `PVNet/configs.example` should work with batches created using the steps and
 batch creation config mentioned above.
 
 Make sure to update the following config files before training your model:
 
 1. In `configs/datamodule/default.yaml`:
-    - update `batch_dir` to point to the directory you stored your concurrent batches in during 
+    - update `batch_dir` to point to the directory you stored your concurrent batches in during
       batch creation.
     - update `gsp_zarr_path` to point to the PVLive data containing the national estimate
 2. In `configs/model/default.yaml`:
@@ -74,7 +74,7 @@ Make sure to update the following config files before training your model:
 4. In `configs.config.yaml`:
     - It is recommended that you set `presave_pvnet_outputs` to `True`. This means that the
       concurrent batches that you create will only be run through the PVNet model once before
-      training and their outputs saved, rather than being run on the fly on each batch throughout 
+      training and their outputs saved, rather than being run on the fly on each batch throughout
       training. This can speed up training significantly.
 
 
diff --git a/pvnet_summation/__init__.py b/pvnet_summation/__init__.py
index ff51c01..7ef08ab 100644
--- a/pvnet_summation/__init__.py
+++ b/pvnet_summation/__init__.py
@@ -1,2 +1,2 @@
 """PVNet_summation"""
-__version__ = "0.1.4"
\ No newline at end of file
+__version__ = "0.1.4"
diff --git a/pvnet_summation/models/base_model.py b/pvnet_summation/models/base_model.py
index caec670..5629bc0 100644
--- a/pvnet_summation/models/base_model.py
+++ b/pvnet_summation/models/base_model.py
@@ -91,7 +91,7 @@ def __init__(
             )
         else:
             self.pvnet_output_shape = (317, self.pvnet_model.forecast_len)
-        
+
         self.use_weighted_loss = False
 
     def predict_pvnet_batch(self, batch):
@@ -186,7 +186,7 @@ def validation_step(self, batch: dict, batch_idx):
 
         losses = self._calculate_common_losses(y, y_hat)
         losses.update(self._calculate_val_losses(y, y_hat))
-        
+
         # Store these to make horizon accuracy plot
         self._horizon_maes.append(
             {i: losses[f"MAE_horizon/step_{i:03}"].cpu().numpy() for i in range(self.forecast_len)}
diff --git a/scripts/checkpoint_to_huggingface.py b/scripts/checkpoint_to_huggingface.py
index 9004762..71a5e3e 100644
--- a/scripts/checkpoint_to_huggingface.py
+++ b/scripts/checkpoint_to_huggingface.py
@@ -1,5 +1,5 @@
 """Command line tool to push locally save model checkpoints to huggingface
-    
+
 use:
 python checkpoint_to_huggingface.py "path/to/model/checkpoints" \
     --local-path="~/tmp/this_model" \

From b646970f26d148253f28ebd0f7d082de00919c9a Mon Sep 17 00:00:00 2001
From: James <djamesfulton@yahoo.co.uk>
Date: Tue, 18 Jun 2024 13:53:47 +0100
Subject: [PATCH 5/7] update workflow

---
 .github/workflows/release.yaml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index b84ecc0..11aedeb 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -1,13 +1,16 @@
-name: Bump version and auto-release
+name: Python Bump Version & release
 
 on:
   push:
     branches:
       - main
+    paths-ignore:
+      - "configs.example/**" # ignores all files in configs.example
+      - "**/README.md" # ignores all README files
 
 jobs:
   release:
-    uses: openclimatefix/.github/.github/workflows/python-release.yml@v1.7.2
+    uses: openclimatefix/.github/.github/workflows/python-release.yml@main
     secrets:
       token: ${{ secrets.PYPI_API_TOKEN }}
       PAT_TOKEN: ${{ secrets.PAT_TOKEN }}

From 83c7e2b3eee180c8768eb44c0a4c561b4957db62 Mon Sep 17 00:00:00 2001
From: James <djamesfulton@yahoo.co.uk>
Date: Tue, 18 Jun 2024 14:07:51 +0100
Subject: [PATCH 6/7] update test workflow

---
 .github/workflows/{workflows.yaml => test.yaml} | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
 rename .github/workflows/{workflows.yaml => test.yaml} (80%)

diff --git a/.github/workflows/workflows.yaml b/.github/workflows/test.yaml
similarity index 80%
rename from .github/workflows/workflows.yaml
rename to .github/workflows/test.yaml
index 28f1ff1..53c4e1b 100644
--- a/.github/workflows/workflows.yaml
+++ b/.github/workflows/test.yaml
@@ -2,6 +2,8 @@ name: Python package tests
 
 on:
   push:
+  pull_request:
+    types: [opened, reopened]
   schedule:
     - cron: "0 12 * * 1"
 jobs:
@@ -11,9 +13,10 @@ jobs:
       # 0 means don't use pytest-xdist
       pytest_numcpus: "4"
       # pytest-cov looks at this folder
-      pytest_cov_dir: "pvnet_summation"
+      pytest_cov_dir: "pvnet"
       # extra things to install
       sudo_apt_install: "libgeos++-dev libproj-dev proj-data proj-bin"
       #      brew_install: "proj geos librttopo"
       os_list: '["ubuntu-latest"]'
       python-version: "['3.10', '3.11']"
+      extra_commands: "pip3 install -e '.[all]'"

From 68f8495ac0e101d39cffc9b31d6b23d4d20504ff Mon Sep 17 00:00:00 2001
From: James <djamesfulton@yahoo.co.uk>
Date: Tue, 18 Jun 2024 14:29:47 +0100
Subject: [PATCH 7/7] update for numpy==2

---
 pvnet_summation/models/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pvnet_summation/models/model.py b/pvnet_summation/models/model.py
index 18ae98e..96528a5 100644
--- a/pvnet_summation/models/model.py
+++ b/pvnet_summation/models/model.py
@@ -59,7 +59,7 @@ def __init__(
             output_network_kwargs = dict()
 
         self.model = output_network(
-            in_features=np.product(self.pvnet_output_shape),
+            in_features=np.prod(self.pvnet_output_shape),
             out_features=self.num_output_features,
             **output_network_kwargs,
         )