From d5bac61b47063f92dbe6c2b85d6b0bd54202ad4b Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 12 Dec 2023 15:32:59 +0000 Subject: [PATCH] refactor into metrics file --- quartz_solar_forecast/eval/__init__.py | 0 quartz_solar_forecast/eval/metrics.py | 29 ++++++++++++++++++++++++++ quartz_solar_forecast/evaluation.py | 6 +++--- tests/eval/test_metrics.py | 20 ++++++++++++++++++ 4 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 quartz_solar_forecast/eval/__init__.py create mode 100644 quartz_solar_forecast/eval/metrics.py create mode 100644 tests/eval/test_metrics.py diff --git a/quartz_solar_forecast/eval/__init__.py b/quartz_solar_forecast/eval/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/quartz_solar_forecast/eval/metrics.py b/quartz_solar_forecast/eval/metrics.py new file mode 100644 index 00000000..94e9ec05 --- /dev/null +++ b/quartz_solar_forecast/eval/metrics.py @@ -0,0 +1,29 @@ +def metrics(results_df): + """ + Calculate and print metrics: MAE + + results_df dataframe with the following columns + - timestamp + - pv_id + - horizon_hours + - forecast_power + - generation_power + + """ + + mae = (results_df["forecast_power"] - results_df['generation_power']).abs().mean() + print(f"MAE: {mae}") + + # calculate metrics over the different horizons hours + # find all unique horizon_hours + horizon_hours = results_df["horizon_hours"].unique() + for horizon_hour in horizon_hours: + # filter results_df to only include the horizon_hour + results_df_horizon = results_df[results_df["horizon_hours"] == horizon_hour] + mae = (results_df_horizon["forecast_power"] - results_df_horizon['generation_power']).abs().mean() + print(f"MAE for horizon {horizon_hour}: {mae}") + + # TODO add more metrics using ocf_ml_metrics + + + diff --git a/quartz_solar_forecast/evaluation.py b/quartz_solar_forecast/evaluation.py index a79104b4..8a03a00a 100644 --- a/quartz_solar_forecast/evaluation.py +++ b/quartz_solar_forecast/evaluation.py @@ -5,6 +5,7 @@ This contains 50 sites each with 50 timestamps to make 2500 samples in total. """ +from quartz_solar_forecast.eval.metrics import metrics from quartz_solar_forecast.eval.nwp import get_nwp from quartz_solar_forecast.eval.forecast import run_forecast from quartz_solar_forecast.eval.utils import combine_forecast_ground_truth @@ -35,9 +36,8 @@ def run_eval(testset_path): results_df.to_csv("results.csv") # Calculate and print metrics: MAE - mae = (results_df["forecast_power"] - results_df['generation_power']).abs().mean() - print(f"MAE: {mae}") - # TODO: add more metrics using ocf_ml_metrics + metrics(results_df) # Visulisations + diff --git a/tests/eval/test_metrics.py b/tests/eval/test_metrics.py new file mode 100644 index 00000000..442cacc6 --- /dev/null +++ b/tests/eval/test_metrics.py @@ -0,0 +1,20 @@ +from quartz_solar_forecast.eval.metrics import metrics +import pandas as pd +import numpy as np + + +def test_metrics(): + + # create a fake dataframe + + results_df = pd.DataFrame( + columns=[ + "id", + "timestamp", + "horizon_hours", + "forecast_power", + "generation_power", + ], data=np.random.random((100,5))) + + # call the metrics function + metrics(results_df) \ No newline at end of file