From 7e7ec46f7fa7c1187383ba632d6056c26a98114b Mon Sep 17 00:00:00 2001 From: Nick Harder Date: Mon, 16 Dec 2024 13:16:15 +0100 Subject: [PATCH] -fix tests -avoid division by zero -relocate prepare_observations function to the base class --- .../learning_unit_operator.py | 8 +-- .../reinforcement_learning/learning_utils.py | 3 + assume/strategies/learning_advanced_orders.py | 2 +- assume/strategies/learning_strategies.py | 59 ++++++++----------- docs/source/release_notes.rst | 2 +- tests/test_rl_strategies.py | 2 + 6 files changed, 37 insertions(+), 39 deletions(-) diff --git a/assume/reinforcement_learning/learning_unit_operator.py b/assume/reinforcement_learning/learning_unit_operator.py index 4d9799f8..0477279b 100644 --- a/assume/reinforcement_learning/learning_unit_operator.py +++ b/assume/reinforcement_learning/learning_unit_operator.py @@ -77,10 +77,10 @@ def add_unit( self.rl_units.append(unit) - #prepare scaled foecasts for the RL staretgy as observations - - - unit.bidding_strategies[market.market_id].prepare_observations(unit, market.market_id) + # prepare scaled foecasts for the RL staretgy as observations + unit.bidding_strategies[market.market_id].prepare_observations( + unit, market.market_id + ) break def handle_market_feedback(self, content: ClearingMessage, meta: MetaDict) -> None: diff --git a/assume/reinforcement_learning/learning_utils.py b/assume/reinforcement_learning/learning_utils.py index a0a1d7ae..1de9c2e8 100644 --- a/assume/reinforcement_learning/learning_utils.py +++ b/assume/reinforcement_learning/learning_utils.py @@ -113,6 +113,9 @@ def min_max_scale(x, min_val: float, max_val: float): min_val: minimum value of the parameter max_val: maximum value of the parameter """ + # Avoid division by zero + if min_val == max_val: + return x return (x - min_val) / (max_val - min_val) diff --git a/assume/strategies/learning_advanced_orders.py b/assume/strategies/learning_advanced_orders.py index 0e009637..ca625c31 100644 --- a/assume/strategies/learning_advanced_orders.py +++ b/assume/strategies/learning_advanced_orders.py @@ -278,7 +278,7 @@ def create_observation( # residual load forecast upper_scaling_factor_res_load = self.max_residual lower_scaling_factor_res_load = self.min_residual - + # price forecast upper_scaling_factor_price = self.max_market_price lower_scaling_factor_price = self.min_market_price diff --git a/assume/strategies/learning_strategies.py b/assume/strategies/learning_strategies.py index b6f0fdaa..6087c358 100644 --- a/assume/strategies/learning_strategies.py +++ b/assume/strategies/learning_strategies.py @@ -53,6 +53,25 @@ def load_actor_params(self, load_path): self.actor_target.eval() self.actor.optimizer.load_state_dict(params["actor_optimizer"]) + def prepare_observations(self, unit, market_id): + # scaling factors for the observations + upper_scaling_factor_res_load = max(unit.forecaster[f"price_{market_id}"]) + lower_scaling_factor_res_load = min(unit.forecaster[f"price_{market_id}"]) + upper_scaling_factor_price = max(unit.forecaster[f"residualy_load_{market_id}"]) + lower_scaling_factor_price = min(unit.forecaster[f"residual_load_{market_id}"]) + + self.scaled_res_load_obs = min_max_scale( + unit.forecaster[f"residual_load_{market_id}"], + lower_scaling_factor_res_load, + upper_scaling_factor_res_load, + ) + + self.scaled_pices_obs = min_max_scale( + unit.forecaster[f"price_{market_id}"], + lower_scaling_factor_price, + upper_scaling_factor_price, + ) + class RLStrategy(AbstractLearningStrategy): """ @@ -142,7 +161,6 @@ def __init__(self, *args, **kwargs): self.algorithm = kwargs.get("algorithm", "matd3") actor_architecture = kwargs.get("actor_architecture", "mlp") - if actor_architecture in actor_architecture_aliases.keys(): self.actor_architecture_class = actor_architecture_aliases[ actor_architecture @@ -187,27 +205,6 @@ def __init__(self, *args, **kwargs): raise FileNotFoundError( f"No policies were provided for DRL unit {self.unit_id}!. Please provide a valid path to the trained policies." ) - - def prepare_observations(self, unit, market_id): - - # scaling factors for the observations - upper_scaling_factor_res_load = max(unit.forecaster[f"price_{market_id}"]) - lower_scaling_factor_res_load = min(unit.forecaster[f"price_{market_id}"]) - upper_scaling_factor_price = max(unit.forecaster[f"residualy_load_{market_id}"]) - lower_scaling_factor_price = min(unit.forecaster[f"residual_load_{market_id}"]) - - - self.scaled_res_load_obs = min_max_scale( - unit.forecaster[f"residual_load_{market_id}"], - lower_scaling_factor_res_load, - upper_scaling_factor_res_load, - ) - - self.scaled_pices_obs = min_max_scale( - unit.forecaster[f"price_{market_id}"], - lower_scaling_factor_price, - upper_scaling_factor_price, - ) def calculate_bids( self, @@ -424,12 +421,9 @@ def create_observation( # checks if we are at end of simulation horizon, since we need to change the forecast then # for residual load and price forecast and scale them - if ( - end_excl + forecast_len - > self.scaled_res_load_obs.index[-1] - ): + if end_excl + forecast_len > self.scaled_res_load_obs.index[-1]: scaled_res_load_forecast = self.scaled_res_load_obs.loc[start:] - + scaled_res_load_forecast = np.concatenate( [ scaled_res_load_forecast, @@ -441,9 +435,9 @@ def create_observation( else: scaled_res_load_forecast = self.scaled_res_load_obs.loc[ - start : end_excl + forecast_len - ] - + start : end_excl + forecast_len + ] + if end_excl + forecast_len > self.scaled_pices_obs.index[-1]: scaled_price_forecast = self.scaled_pices_obs.loc[start:] scaled_price_forecast = np.concatenate( @@ -457,9 +451,8 @@ def create_observation( else: scaled_price_forecast = self.scaled_pices_obs.loc[ - start : end_excl + forecast_len - ] - + start : end_excl + forecast_len + ] # get last accepted bid volume and the current marginal costs of the unit current_volume = unit.get_output_before(start) diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index f4153526..67bee9fe 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -39,7 +39,7 @@ v0.5.0 - (10th December 2024) - **Overall Performance Optimization:** The overall performance of the framework has been improved by a factor of 5x to 12x depending on the size of the simulation (number of units, markets, and time steps). - **Learning Opservation Space Scaling:** Instead of the formerly used max sclaing of the observation space, we added a min-max scaling to the observation space. - This allows for a more robust scaling of the observation space for furture analysis. + This allows for a more robust scaling of the observation space for future analysis. **Bugfixes:** - **Tutorials**: General fixes of the tutorials, to align with updated functionalitites of Assume diff --git a/tests/test_rl_strategies.py b/tests/test_rl_strategies.py index c91da41b..c645237b 100644 --- a/tests/test_rl_strategies.py +++ b/tests/test_rl_strategies.py @@ -91,6 +91,7 @@ def test_learning_strategies(mock_market_config, power_plant_mcp): ] strategy = power_plant_mcp.bidding_strategies["EOM"] + strategy.prepare_observations(power_plant_mcp, mc.market_id) bids = strategy.calculate_bids(power_plant_mcp, mc, product_tuples=product_tuples) assert len(bids) == 2 @@ -123,6 +124,7 @@ def test_lstm_learning_strategies(mock_market_config, power_plant_lstm): ] strategy = power_plant_lstm.bidding_strategies["EOM"] + strategy.prepare_observations(power_plant_lstm, mc.market_id) bids = strategy.calculate_bids(power_plant_lstm, mc, product_tuples=product_tuples) assert len(bids) == 2