diff --git a/README.md b/README.md index 2868ed16..86de7105 100644 --- a/README.md +++ b/README.md @@ -60,11 +60,23 @@ All the data is publicly available and the evaluation script can be run with the python scripts/run_evaluation.py ``` -The test dataset we used is defined in `quartz_solar_forecast/dataset/testset.csv'. +The test dataset we used is defined in `quartz_solar_forecast/dataset/testset.csv`. This contains 50 PV sites, which 50 unique timestamps. The data is from 2021. -The results of the evaluation are shown below: -TODO +The results of the evaluation are as follows +The MAE is 0.1906 kw across all horizons. + +| Horizons | MAE [kw] | +|----------|---------------| +| 0 | 0.202 +- 0.03 | +| 1 | 0.211 +- 0.03 | +| 2 | 0.216 +- 0.03 | +| 3-4 | 0.211 +- 0.02 | +| 5-8 | 0.191 +- 0.01 | +| 9-16 | 0.161 +- 0.01 | +| 17-24 | 0.173 +- 0.01 | +| 24-48 | 0.201 +- 0.01 | + diff --git a/quartz_solar_forecast/eval/metrics.py b/quartz_solar_forecast/eval/metrics.py index b71766f1..496ce2d1 100644 --- a/quartz_solar_forecast/eval/metrics.py +++ b/quartz_solar_forecast/eval/metrics.py @@ -42,4 +42,28 @@ def metrics(results_df: pd.DataFrame): print(f"MAE for horizon {horizon_hour}: {mae} +- {1.96*sem}") - # TODO add more metrics using ocf_ml_metrics + # calculate metrics over the different horizon groups + horizon_groups = [[0, 0], [1, 1], [2, 2], [3, 4], [5, 8], [9, 16], [17, 24], [24, 48]] + for horizon_group in horizon_groups: + horizon_group_df = results_df[ + results_df["horizon_hour"].between(horizon_group[0], horizon_group[1]) + ] + mae = np.round( + (horizon_group_df["forecast_power"] - horizon_group_df["generation_power"]) + .abs() + .mean(), + 3, + ) + sem = np.round( + ( + (horizon_group_df["forecast_power"] - horizon_group_df["generation_power"]) + .abs() + .std() + / len(horizon_group_df) ** 0.5 + ), + 3, + ) + + print(f"MAE for horizon {horizon_group}: {mae} +- {1.96*sem}") + + # TODO add more metrics using ocf_ml_metrics