From 7e7ec46f7fa7c1187383ba632d6056c26a98114b Mon Sep 17 00:00:00 2001
From: Nick Harder <nick.harder94@gmail.com>
Date: Mon, 16 Dec 2024 13:16:15 +0100
Subject: [PATCH] -fix tests -avoid division by zero -relocate
 prepare_observations function to the base class

---
 .../learning_unit_operator.py                 |  8 +--
 .../reinforcement_learning/learning_utils.py  |  3 +
 assume/strategies/learning_advanced_orders.py |  2 +-
 assume/strategies/learning_strategies.py      | 59 ++++++++-----------
 docs/source/release_notes.rst                 |  2 +-
 tests/test_rl_strategies.py                   |  2 +
 6 files changed, 37 insertions(+), 39 deletions(-)

diff --git a/assume/reinforcement_learning/learning_unit_operator.py b/assume/reinforcement_learning/learning_unit_operator.py
index 4d9799f8..0477279b 100644
--- a/assume/reinforcement_learning/learning_unit_operator.py
+++ b/assume/reinforcement_learning/learning_unit_operator.py
@@ -77,10 +77,10 @@ def add_unit(
 
                 self.rl_units.append(unit)
 
-                #prepare scaled foecasts for the RL staretgy as observations
-
-
-                unit.bidding_strategies[market.market_id].prepare_observations(unit, market.market_id)
+                # prepare scaled foecasts for the RL staretgy as observations
+                unit.bidding_strategies[market.market_id].prepare_observations(
+                    unit, market.market_id
+                )
                 break
 
     def handle_market_feedback(self, content: ClearingMessage, meta: MetaDict) -> None:
diff --git a/assume/reinforcement_learning/learning_utils.py b/assume/reinforcement_learning/learning_utils.py
index a0a1d7ae..1de9c2e8 100644
--- a/assume/reinforcement_learning/learning_utils.py
+++ b/assume/reinforcement_learning/learning_utils.py
@@ -113,6 +113,9 @@ def min_max_scale(x, min_val: float, max_val: float):
         min_val: minimum value of the parameter
         max_val: maximum value of the parameter
     """
+    # Avoid division by zero
+    if min_val == max_val:
+        return x
     return (x - min_val) / (max_val - min_val)
 
 
diff --git a/assume/strategies/learning_advanced_orders.py b/assume/strategies/learning_advanced_orders.py
index 0e009637..ca625c31 100644
--- a/assume/strategies/learning_advanced_orders.py
+++ b/assume/strategies/learning_advanced_orders.py
@@ -278,7 +278,7 @@ def create_observation(
         # residual load forecast
         upper_scaling_factor_res_load = self.max_residual
         lower_scaling_factor_res_load = self.min_residual
-        
+
         # price forecast
         upper_scaling_factor_price = self.max_market_price
         lower_scaling_factor_price = self.min_market_price
diff --git a/assume/strategies/learning_strategies.py b/assume/strategies/learning_strategies.py
index b6f0fdaa..6087c358 100644
--- a/assume/strategies/learning_strategies.py
+++ b/assume/strategies/learning_strategies.py
@@ -53,6 +53,25 @@ def load_actor_params(self, load_path):
             self.actor_target.eval()
             self.actor.optimizer.load_state_dict(params["actor_optimizer"])
 
+    def prepare_observations(self, unit, market_id):
+        # scaling factors for the observations
+        upper_scaling_factor_res_load = max(unit.forecaster[f"price_{market_id}"])
+        lower_scaling_factor_res_load = min(unit.forecaster[f"price_{market_id}"])
+        upper_scaling_factor_price = max(unit.forecaster[f"residualy_load_{market_id}"])
+        lower_scaling_factor_price = min(unit.forecaster[f"residual_load_{market_id}"])
+
+        self.scaled_res_load_obs = min_max_scale(
+            unit.forecaster[f"residual_load_{market_id}"],
+            lower_scaling_factor_res_load,
+            upper_scaling_factor_res_load,
+        )
+
+        self.scaled_pices_obs = min_max_scale(
+            unit.forecaster[f"price_{market_id}"],
+            lower_scaling_factor_price,
+            upper_scaling_factor_price,
+        )
+
 
 class RLStrategy(AbstractLearningStrategy):
     """
@@ -142,7 +161,6 @@ def __init__(self, *args, **kwargs):
         self.algorithm = kwargs.get("algorithm", "matd3")
         actor_architecture = kwargs.get("actor_architecture", "mlp")
 
-
         if actor_architecture in actor_architecture_aliases.keys():
             self.actor_architecture_class = actor_architecture_aliases[
                 actor_architecture
@@ -187,27 +205,6 @@ def __init__(self, *args, **kwargs):
             raise FileNotFoundError(
                 f"No policies were provided for DRL unit {self.unit_id}!. Please provide a valid path to the trained policies."
             )
-        
-    def prepare_observations(self, unit, market_id):
-
-        # scaling factors for the observations
-        upper_scaling_factor_res_load = max(unit.forecaster[f"price_{market_id}"])
-        lower_scaling_factor_res_load = min(unit.forecaster[f"price_{market_id}"])
-        upper_scaling_factor_price = max(unit.forecaster[f"residualy_load_{market_id}"])
-        lower_scaling_factor_price = min(unit.forecaster[f"residual_load_{market_id}"])
-
-
-        self.scaled_res_load_obs = min_max_scale(
-                unit.forecaster[f"residual_load_{market_id}"],
-                lower_scaling_factor_res_load,
-                upper_scaling_factor_res_load,
-            )
-        
-        self.scaled_pices_obs = min_max_scale(
-                unit.forecaster[f"price_{market_id}"],
-                lower_scaling_factor_price,
-                upper_scaling_factor_price,
-            )
 
     def calculate_bids(
         self,
@@ -424,12 +421,9 @@ def create_observation(
 
         # checks if we are at end of simulation horizon, since we need to change the forecast then
         # for residual load and price forecast and scale them
-        if (
-            end_excl + forecast_len
-            > self.scaled_res_load_obs.index[-1]
-        ):
+        if end_excl + forecast_len > self.scaled_res_load_obs.index[-1]:
             scaled_res_load_forecast = self.scaled_res_load_obs.loc[start:]
-                
+
             scaled_res_load_forecast = np.concatenate(
                 [
                     scaled_res_load_forecast,
@@ -441,9 +435,9 @@ def create_observation(
 
         else:
             scaled_res_load_forecast = self.scaled_res_load_obs.loc[
-                    start : end_excl + forecast_len
-                ]
-            
+                start : end_excl + forecast_len
+            ]
+
         if end_excl + forecast_len > self.scaled_pices_obs.index[-1]:
             scaled_price_forecast = self.scaled_pices_obs.loc[start:]
             scaled_price_forecast = np.concatenate(
@@ -457,9 +451,8 @@ def create_observation(
 
         else:
             scaled_price_forecast = self.scaled_pices_obs.loc[
-                    start : end_excl + forecast_len
-                ]
-            
+                start : end_excl + forecast_len
+            ]
 
         # get last accepted bid volume and the current marginal costs of the unit
         current_volume = unit.get_output_before(start)
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
index f4153526..67bee9fe 100644
--- a/docs/source/release_notes.rst
+++ b/docs/source/release_notes.rst
@@ -39,7 +39,7 @@ v0.5.0 - (10th December 2024)
 - **Overall Performance Optimization:** The overall performance of the framework has been improved by a factor of 5x to 12x
   depending on the size of the simulation (number of units, markets, and time steps).
 - **Learning Opservation Space Scaling:** Instead of the formerly used max sclaing of the observation space, we added a min-max scaling to the observation space.
-  This allows for a more robust scaling of the observation space for furture analysis.
+  This allows for a more robust scaling of the observation space for future analysis.
 
 **Bugfixes:**
   - **Tutorials**: General fixes of the tutorials, to align with updated functionalitites of Assume
diff --git a/tests/test_rl_strategies.py b/tests/test_rl_strategies.py
index c91da41b..c645237b 100644
--- a/tests/test_rl_strategies.py
+++ b/tests/test_rl_strategies.py
@@ -91,6 +91,7 @@ def test_learning_strategies(mock_market_config, power_plant_mcp):
     ]
 
     strategy = power_plant_mcp.bidding_strategies["EOM"]
+    strategy.prepare_observations(power_plant_mcp, mc.market_id)
     bids = strategy.calculate_bids(power_plant_mcp, mc, product_tuples=product_tuples)
 
     assert len(bids) == 2
@@ -123,6 +124,7 @@ def test_lstm_learning_strategies(mock_market_config, power_plant_lstm):
     ]
 
     strategy = power_plant_lstm.bidding_strategies["EOM"]
+    strategy.prepare_observations(power_plant_lstm, mc.market_id)
     bids = strategy.calculate_bids(power_plant_lstm, mc, product_tuples=product_tuples)
 
     assert len(bids) == 2