diff --git a/assume/common/base.py b/assume/common/base.py index b1716853..78e0ef44 100644 --- a/assume/common/base.py +++ b/assume/common/base.py @@ -149,6 +149,7 @@ def set_dispatch_plan( for order in orderbook: start = order["start_time"] end = order["end_time"] + # end includes the end of the last product, to get the last products' start time we deduct the frequency once end_excl = end - self.index.freq if isinstance(order["accepted_volume"], dict): added_volume = list(order["accepted_volume"].values()) @@ -256,6 +257,7 @@ def calculate_cashflow(self, product_type: str, orderbook: Orderbook): for order in orderbook: start = order["start_time"] end = order["end_time"] + # end includes the end of the last product, to get the last products' start time we deduct the frequency once end_excl = end - self.index.freq if isinstance(order["accepted_volume"], dict): diff --git a/assume/strategies/flexable.py b/assume/strategies/flexable.py index 2d0b8494..10ba7fcd 100644 --- a/assume/strategies/flexable.py +++ b/assume/strategies/flexable.py @@ -595,6 +595,7 @@ def calculate_reward_EOM( for order in orderbook: start = order["start_time"] + # end includes the end of the last product, to get the last products' start time we deduct the frequency once end_excl = order["end_time"] - unit.index.freq order_times = unit.index[start:end_excl] diff --git a/assume/strategies/flexable_storage.py b/assume/strategies/flexable_storage.py index 8bb5a590..7ad0f8e3 100644 --- a/assume/strategies/flexable_storage.py +++ b/assume/strategies/flexable_storage.py @@ -201,6 +201,7 @@ def calculate_reward( for order in orderbook: start = order["start_time"] + # end includes the end of the last product, to get the last products' start time we deduct the frequency once end_excl = order["end_time"] - unit.index.freq # Extract outputs and costs in one step diff --git a/assume/strategies/learning_advanced_orders.py b/assume/strategies/learning_advanced_orders.py index a721dc53..a66da20b 100644 --- a/assume/strategies/learning_advanced_orders.py +++ b/assume/strategies/learning_advanced_orders.py @@ -255,6 +255,7 @@ def create_observation( The scaling factors are defined by the maximum residual load, the maximum bid price and the maximum capacity of the unit. """ + # end includes the end of the last product, to get the last products' start time we deduct the frequency once end_excl = end - unit.index.freq # get the forecast length depending on the time unit considered in the modelled unit diff --git a/assume/strategies/learning_strategies.py b/assume/strategies/learning_strategies.py index 4f9d949b..d41dd28b 100644 --- a/assume/strategies/learning_strategies.py +++ b/assume/strategies/learning_strategies.py @@ -388,6 +388,7 @@ def create_observation( the total capacity and marginal cost, scaled by maximum power and bid price, respectively. """ + # end includes the end of the last product, to get the last products' start time we deduct the frequency once end_excl = end - unit.index.freq # get the forecast length depending on the tme unit considered in the modelled unit @@ -521,6 +522,7 @@ def calculate_reward( for order in orderbook: start = order["start_time"] end = order["end_time"] + # end includes the end of the last product, to get the last products' start time we deduct the frequency once end_excl = end - unit.index.freq # depending on way the unit calculates marginal costs we take costs @@ -992,6 +994,7 @@ def create_observation( the agent's action selection. """ + # end includes the end of the last product, to get the last products' start time we deduct the frequency once end_excl = end - unit.index.freq # get the forecast length depending on the tme unit considered in the modelled unit diff --git a/assume/units/demand.py b/assume/units/demand.py index 5a50edfa..37a02684 100644 --- a/assume/units/demand.py +++ b/assume/units/demand.py @@ -101,6 +101,8 @@ def calculate_min_max_power( Returns: tuple[pandas.Series, pandas.Series]: The bid colume as both the minimum and maximum power output of the unit. """ + + # end includes the end of the last product, to get the last products' start time we deduct the frequency once end_excl = end - self.index.freq bid_volume = (self.volume - self.outputs[product_type]).loc[start:end_excl] diff --git a/assume/units/powerplant.py b/assume/units/powerplant.py index 11905ac7..5ab7fbff 100644 --- a/assume/units/powerplant.py +++ b/assume/units/powerplant.py @@ -260,6 +260,7 @@ def calculate_min_max_power( Note: The calculation does not include ramping constraints and can be used for arbitrary start times in the future. """ + # end includes the end of the last product, to get the last products' start time we deduct the frequency once end_excl = end - self.index.freq base_load = self.outputs["energy"].loc[start:end_excl] diff --git a/assume/units/storage.py b/assume/units/storage.py index c9b039c0..275fed3e 100644 --- a/assume/units/storage.py +++ b/assume/units/storage.py @@ -300,6 +300,7 @@ def calculate_min_max_charge( Returns: tuple[np.array, np.array]: The minimum and maximum charge power levels of the storage unit in MW. """ + # end includes the end of the last product, to get the last products' start time we deduct the frequency once end_excl = end - self.index.freq base_load = self.outputs["energy"].loc[start:end_excl] @@ -339,6 +340,7 @@ def calculate_min_max_discharge( Returns: tuple[np.array, np.array]: The minimum and maximum discharge power levels of the storage unit in MW. """ + # end includes the end of the last product, to get the last products' start time we deduct the frequency once end_excl = end - self.index.freq base_load = self.outputs["energy"].loc[start:end_excl] diff --git a/docs/source/installation.rst b/docs/source/installation.rst index b3c0c986..4305d6d3 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -82,13 +82,13 @@ ASSUME uses `argcomplete` for argument completion on the CLI. On Windows, one needs to run: -`register-python-argcomplete --shell powershell assume | Out-String | Invoke-Expression` + register-python-argcomplete --shell powershell assume | Out-String | Invoke-Expression in the used conda environment, to install tab completions. On Bash or zsh (Linux and Mac) run the following in the correct conda environment with assume and argcomplete installed: -`eval "$(register-python-argcomplete assume)"` + eval "$(register-python-argcomplete assume)" Install using Docker diff --git a/docs/source/learning.rst b/docs/source/learning.rst index c1b59ef2..33675550 100644 --- a/docs/source/learning.rst +++ b/docs/source/learning.rst @@ -98,8 +98,8 @@ The main idea is to use a centralized critic during the training phase, which ha Changes in state transitions and rewards can be explained by the actions of other agents. Meanwhile, during both training and execution, the actor has access only to its local observations :math:`o_i` derived from the entire state :math:`S`. -For each agent :math:`i`, we train two centralized critics :math:`Q_{i,\theta_1,2}(S, a_1, \ldots, a_N)` together with two target critic networks. -Similar to TD3, the smaller value of the two critics and target action noise :math:`a_i,k \sim` is used to calculate the target :math:`y_i,k`. +For each agent :math:`i`, we train not one but two centralized critics :math:`Q_{i,\theta_1,2}(S, a_1, \ldots, a_N)` together with two target critic networks. +Similar to TD3, the smaller value of the two critics and target action noise :math:`a_i,k \sim` is used to calculate the target :math:`y_i,k`. This is done to to address the issue of overestimation bias. .. math:: diff --git a/examples/notebooks/01_minimal_manual_example.ipynb b/examples/notebooks/01_minimal_manual_example.ipynb index 66f54c05..55611867 100644 --- a/examples/notebooks/01_minimal_manual_example.ipynb +++ b/examples/notebooks/01_minimal_manual_example.ipynb @@ -147,7 +147,7 @@ "In this code:\n", "- `marketdesign` is a list containing a single market configuration.\n", "\n", - "- `MarketConfig(...)` defines the configuration for a specific market. In this case, it's named \"EOM\" (End of Month).\n", + "- `MarketConfig(...)` defines the configuration for a specific market. In this case, it's named \"EOM\" (Energy Only Market).\n", "\n", " - `name=\"EOM\"` - Specifies the name of the market configuration as \"EOM\".\n", "\n", @@ -197,7 +197,7 @@ "source": [ "## Adding Unit Operators and Units\n", "\n", - "After initializing the simulation, and creating a market, we add unit operators and units to the simulation world." + "After initializing the simulation, and creating a market, we add unit operators and units to the simulation world. A **unit** in ASSUME refers to an entity that participates in the market, either buying or selling electricity." ] }, { diff --git a/examples/notebooks/02_automated_run_example.ipynb b/examples/notebooks/02_automated_run_example.ipynb index 69e9a210..57310347 100644 --- a/examples/notebooks/02_automated_run_example.ipynb +++ b/examples/notebooks/02_automated_run_example.ipynb @@ -155,13 +155,13 @@ "\n", "- `emission_factor`: A numerical value representing the amount of CO2 (or equivalent) emissions produced per unit of electricity generated.\n", "\n", - "- `max_power`: The maximum power output each unit can deliver. This is the upper limit of the unit's operational capacity.\n", + "- `max_power`: The maximum power output each unit can deliver. This is the upper limit of the unit's operational capacity given in MW.\n", "\n", - "- `min_power`: The minimum stable level of power that each unit can produce while remaining operational.\n", + "- `min_power`: The minimum stable level of power that each unit can produce while remaining operational. It is given in MW\n", "\n", "- `efficiency`: A measure of how effectively each unit converts fuel into electricity. This efficienty represent the final efficiency of converting fuel into electricity.\n", "\n", - "- `additional_cost`: The additional operational costs for each unit, such as maintenance and staffing, expressed in currency units per MWh.\n", + "- `additional_cost`: The additional operational costs for each unit, such as maintenance and staffing, expressed in EUR/MWh.\n", "\n", "- `unit_operator`: The entity responsible for operating each power plant unit. This could be a utility company, a private operator, or another type of organization." ] @@ -253,7 +253,7 @@ "\n", "You might notice, that the column name we use is demand_EOM, which is similar to the name of our demand unit. The framework is designed in such way, that multiple demand units can be defined in the same file. The column name is used to match the demand time series with the correct demand unit. Afterwards, each demand unit following a naive bidding strategy will bid the respective demand value into the market.\n", "\n", - "Also, the length of the demand time series must be at least as long as the simulation time horizon. If the time series is longer than the simulation time horizon, the framework will automatically truncate it to the correct length. If the resolution of the time series is higher than the simulation time step, the framework will automatically resample the time series to match the simulation time step. If it is shorter, an error will be raised." + "Also, the length of the demand time series must be at least as long as the simulation time horizon. If the time series is longer than the simulation time horizon, the framework will automatically truncate it to the correct length. This is being demonstrated by giving date ranges of eight days that will be truncated to a week. If the resolution of the time series is higher than the simulation time step, the framework will automatically resample the time series to match the simulation time step. If it is shorter, an error will be raised." ] }, { diff --git a/examples/notebooks/03_custom_unit_example.ipynb b/examples/notebooks/03_custom_unit_example.ipynb index 697fe8f3..f2240f92 100644 --- a/examples/notebooks/03_custom_unit_example.ipynb +++ b/examples/notebooks/03_custom_unit_example.ipynb @@ -253,6 +253,7 @@ " self.min_power = min_power\n", " self.additional_cost = additional_cost\n", "\n", + " # Conversion factor functions will be introduced in section 3.3\n", " self.conversion_factors = self.get_conversion_factors()\n", "\n", " # this function is a must be part of any unit class\n", @@ -361,7 +362,7 @@ " else:\n", " hydrogen_production = hydrogen_demand\n", "\n", - " # get dynamic conversion factor\n", + " # Conversion factor functions will be introduced in section 3.3\n", " dynamic_conversion_factor = self.get_dynamic_conversion_factor(\n", " hydrogen_production\n", " )\n", diff --git a/examples/notebooks/04_reinforcement_learning_example.ipynb b/examples/notebooks/04_reinforcement_learning_example.ipynb index 847faf58..d70c78d4 100644 --- a/examples/notebooks/04_reinforcement_learning_example.ipynb +++ b/examples/notebooks/04_reinforcement_learning_example.ipynb @@ -12,7 +12,7 @@ "This tutorial will introduce users into ASSUME and its ways of using reinforcement learning (RL). The main objective of this tutorial is to ensure participants grasp the steps required to equip a new unit with RL strategies or modify the action dimensions.\n", "Our emphasis lies in the bidding strategy, with less focus on the algorithm and role. The latter are usable as a plug-and-play solution in the framework. The following coding tasks will highlight the key aspects to be adjusted, as already outlined in the learning_strategies.py file.\n", "\n", - "The outline of this tutorial is as follows. We will start with a basic summary of the implementation of reinforcement learning (RL) in ASSUME and its architecture (1. ASSUME & Learning Basics) . If you need a refresher on RL in general, please visit our readthedocs (https://ASSUME.readthedocs.io/en/latest/). Afterwards, we install ASSUME in this Google Colab (2. Get ASSUME running) and then we dive into the learning_strategies.py file and explain how we need to adjust conventional bidding strategies to incorporate RL (3. Make ASSUME learn).\n", + "The outline of this tutorial is as follows. We will start with a basic summary of the implementation of reinforcement learning (RL) in ASSUME and its architecture (1. ASSUME & Learning Basics). A brief refresher is also given in this exercise. If you need a more thorough refresher on RL in general, please visit our readthedocs ([Reinforcement Learning Overview](https://assume.readthedocs.io/en/latest/learning/) & [Reinforcement Learning Algorithms](https://assume.readthedocs.io/en/latest/learning_algorithm.html)). Afterwards, we install ASSUME in this Google Colab (2. Get ASSUME running) and then we dive into the learning_strategies.py file and explain how we need to adjust conventional bidding strategies to incorporate RL (3. Make ASSUME learn).\n", "\n", "**As a whole, this tutorial covers the following coding tasks:**\n", "\n", @@ -69,11 +69,11 @@ "id": "dDn1blWvPM7Z" }, "source": [ - "Let's focus on the bright yellow part of the architecture, namely the learning algorithm, the actor and the critic. We start with some **reinforcement learning background**. In the current implementation of ASSUME, we model the electricity market as a partially observable Markov game, which is an extension of MDPs for multi-agent setups.\n", + "Let's focus on the bright yellow part of the architecture, namely the learning algorithm, the actor and the critic. We start with some **reinforcement learning background**. In the current implementation of ASSUME, we model the electricity market as a partially observable Markov game, which is an extension of MDPs for multi-agent setups. Following, a brief summary of the more detailed documentation on [Reinforcement Learning](https://assume.readthedocs.io/en/latest/learning/) is provided.\n", "\n", - "**Multi-agent DRL** is understood as the simultaneous learning of multiple agents interacting in the same environment. The Markov game for $N$ agents consists of a set of states $S$, a set of actions $A_1, ..., A_N$, a set of observations $O_1, ..., O_N$, and a state transition function $P: S \\times A_1 \\times ... \\times A_N \\rightarrow \\mathcal{P}(S)$ dependent on the state and actions of all agents. After taking action $a_i \\in A_i$ in state $s_i \\in S$ according to a policy $\\pi_i:O_i\\rightarrow A_i$, every agent $i$ is transitioned into the new state $s'_i \\in S$. Each agent receives a reward $r_i$ according to the individual reward function $R_i$ and a private observation correlated with the state $o_i:S \\rightarrow O_i$. Like MDP, each agent $i$ learns an optimal policy $\\pi_i^*(s)$ that maximizes its expected reward.\n", + "**Multi-agent DRL** involves multiple agents learning simultaneously while interacting in the same environment. In a Markov game, agents exist in a set of states and can take actions, receive observations, and transition between states. Each agent follows a policy aimed at maximizing its expected reward based on individual reward functions and private observations.\n", "\n", - "To enable multi-agent learning some adjustments are needed within the learning algorithm to get from the TD3 to an MATD3 algorithm. Other authors used similar tweaks to improve the TD3 into the MADDPG algorithm and derive the MA-TD3 algorithm. We'll start explaining the learning by focusing on a single agent and then extend it to multi-agent learning." + "To adapt from a single-agent algorithm like TD3 to a multi-agent version (MATD3), certain modifications are necessary. The learning process begins by understanding single-agent learning and then extends to multi-agent scenarios.\n" ] }, { @@ -83,29 +83,15 @@ "source": [ "### Single-Agent Learning\n", "\n", - "We use the actor-critic approach to train the learning agent. The actor-critic approach is a popular RL algorithm that uses two neural networks: an actor network and a critic network. The actor network is responsible for selecting actions, while the critic network evaluates the quality of the actions taken by the actor.\n", + "The approach uses an actor-critic method with two neural networks: an actor network and a critic network. The actor network selects actions, while the critic network evaluates the quality of those actions. Both networks are trained simultaneously through an iterative process.\n", "\n", - "The actor and critic networks are trained simultaneously using the actor-critic algorithm, which updates the weights of both networks at each time step. The actor-critic algorithm is a form of policy iteration, where the policy is updated based on the estimated value function, and the value function is updated based on the.\n", + "**Actor**: \n", + "The actor network is trained using a policy gradient method. It updates its weights to maximize the expected reward by adjusting the probability of selecting specific actions in given states.\n", "\n", - "**Actor**\n", - "The actor network is trained using the policy gradient method, which updates the weights of the actor network in the direction of the gradient of the expected reward with respect to the network parameters:\n", + "**Critic**: \n", + "The critic network is trained using temporal difference (TD) learning. It estimates the value of states by comparing the current state's estimated value with the next state's estimated value, updating its weights based on the difference (TD error).\n", "\n", - "$\\nabla_{\\theta} J(\\theta) = E[\\nabla_{\\theta} log \\pi_{\\theta}(a_t|s_t) * Q^{\\pi}(s_t, a_t)]$\n", - "\n", - "where $J(\\theta)$ is the expected reward, $\\theta$ are the weights of the actor network, $\\pi_{\\theta}(a_t|s_t)$ is the probability of selecting action a_t given state $s_t$, and $Q^{\\pi}(s_t, a_t)$ is the expected reward of taking action $a_t$ in state $s_t$ under policy $\\pi$.\n", - "\n", - "**Critic**\n", - "The critic network is trained using the temporal difference (TD) learning method, which updates the weights of the critic network based on the difference between the estimated value of the current state and the estimated value of the next state:\n", - "\n", - "$\\delta_t = r_t + \\gamma * V(s_{t+1}) - V(s_t)$\n", - "\n", - "where $\\delta_t$ is the TD error, $r_t$ is the reward obtained at time step $t$, $\\gamma$ is the discount factor, $V(s_t)$ is the estimated value of state $s_t$, and $V(s_{t+1})$ is the estimated value of the next state $s_{t+1}$.\n", - "\n", - "The weights of the critic network are updated in the direction of the gradient of the mean squared TD error:\n", - "\n", - "$\\nabla_{\\theta} L = E[(\\delta_t)^2]$\n", - "\n", - "where L is the loss function." + "This approach allows the agent to learn an optimal policy by continuously improving its understanding of which actions lead to the highest rewards in different states." ] }, { @@ -117,23 +103,14 @@ "source": [ "### Multi-Agent Learning\n", "\n", - "While in a single-agent setup, the state transition and respective reward depend only on the actions of a single agent, the state transitions and rewards depend on the actions of all learning agents in a multi-agent setup. This makes the environment non-stationary for a single agent, which violates the Markov property. Hence, the convergence guarantees of single-agent RL algorithms are no longer valid. Therefore, we utilize the framework of centralized training and decentralized execution and expand upon the MADDPG algorithm. The main idea of this approach is to use a centralized critic during the training phase, which has access to the entire state $\\textbf{S}$, and all actions $a_1, ..., a_N$, thus resolving the issue of non-stationarity, as changes in state transitions and rewards can be explained by the actions of other agents. Meanwhile, during both training and execution, the actor has access only to its local observations $o_i$ derived from the entire state $\\textbf{S}$.\n", + "In a multi-agent setup, state transitions and rewards depend on the actions of all learning agents, unlike single-agent scenarios. This creates a non-stationary environment that violates the Markov property, invalidating traditional single-agent reinforcement learning convergence guarantees.\n", "\n", - "For each agent $i$, we train two centralized critics $Q_{i,θ_1,2}(S, a_1, ..., a_N)$ together with two target critic networks. Similar to TD3, the smaller value of the two critics and target action noise $a_i$,$k~$ is used to calculate the target $y_i,k$:\n", + "To address this challenge, the approach uses a centralized training and decentralized execution framework, expanding on the MADDPG algorithm. During training, a centralized critic has access to the entire state and all agents' actions, which helps explain state transition changes. However, during both training and execution, each agent's actor uses only its local observations.\n", "\n", - "$y_i,k = r_i,k + γ * min_j=1,2 Q_i,θ′_j(S′_k, a_1,k, ..., a_N,k, π′(o_i,k))$\n", + "For each agent, two centralized critics are trained alongside target critic networks to address Overestimation Bias. Similar to TD3, the approach uses the smaller value of two critics and adds target action noise to calculate the target value. The critics are trained using the mean squared Bellman error loss.\n", "\n", - "where $r_i,k$ is the reward obtained by agent $i$ at time step $k$, $γ$ is the discount factor, $S′_k$ is the next state of the environment, and $π′(o_i,k)$ is the target policy of agent $i$.\n", - "\n", - "The critics are trained using the mean squared Bellman error (MSBE) loss:\n", - "\n", - "$L(Q_i,θ_j) = E[(y_i,k - Q_i,θ_j(S_k, a_1,k, ..., a_N,k))^2]$\n", - "\n", - "The actor policy of each agent is updated using the deterministic policy gradient (DPG) algorithm:\n", - "\n", - "$∇_a Q_i,θ_j(S_k, a_1,k, ..., a_N,k, π(o_i,k))|a_i,k=π(o_i,k) * ∇_θ π(o_i,k)$\n", - "\n", - "The actor is updated similarly using only one critic network $Q_{θ1}$. These changes to the original DDPG algorithm allow increased stability and convergence of the TD3 algorithm. This is especially relevant when approaching a multi-agent RL setup, as discussed in the following section." + "The actor policy for each agent is updated using the deterministic policy gradient algorithm. Each actor uses only one critic network to update its policy. These modifications to the original DDPG algorithm aim to increase stability and convergence, particularly in multi-agent reinforcement learning scenarios.\n", + "The key innovation is enabling agents to learn effectively in complex, interactive environments by using centralized information during training while maintaining decentralized decision-making during execution.\n" ] }, { @@ -585,7 +562,7 @@ " > unit.forecaster[f\"residual_load_{market_id}\"].index[-1]\n", " ):\n", " scaled_res_load_forecast = (\n", - " unit.forecaster[f\"residual_load_{market_id}\"].loc[start:].values\n", + " unit.forecaster[f\"residual_load_{market_id}\"].loc[start:]\n", " / scaling_factor_res_load\n", " )\n", " scaled_res_load_forecast = np.concatenate(\n", @@ -599,16 +576,15 @@ "\n", " else:\n", " scaled_res_load_forecast = (\n", - " unit.forecaster[f\"residual_load_{market_id}\"]\n", - " .loc[start : end_excl + forecast_len]\n", - " .values\n", + " unit.forecaster[f\"residual_load_{market_id}\"].loc[\n", + " start : end_excl + forecast_len\n", + " ]\n", " / scaling_factor_res_load\n", " )\n", "\n", " if end_excl + forecast_len > unit.forecaster[f\"price_{market_id}\"].index[-1]:\n", " scaled_price_forecast = (\n", - " unit.forecaster[f\"price_{market_id}\"].loc[start:].values\n", - " / scaling_factor_price\n", + " unit.forecaster[f\"price_{market_id}\"].loc[start:] / scaling_factor_price\n", " )\n", " scaled_price_forecast = np.concatenate(\n", " [\n", @@ -621,15 +597,15 @@ "\n", " else:\n", " scaled_price_forecast = (\n", - " unit.forecaster[f\"price_{market_id}\"]\n", - " .loc[start : end_excl + forecast_len]\n", - " .values\n", + " unit.forecaster[f\"price_{market_id}\"].loc[\n", + " start : end_excl + forecast_len\n", + " ]\n", " / scaling_factor_price\n", " )\n", "\n", " # get last accepted bid volume and the current marginal costs of the unit\n", " current_volume = unit.get_output_before(start)\n", - " current_costs = unit.calc_marginal_cost_with_partial_eff(current_volume, start)\n", + " current_costs = unit.calculate_marginal_cost(start, current_volume)\n", "\n", " # scale unit outputs\n", " scaled_total_capacity = current_volume / scaling_factor_total_capacity\n", @@ -886,8 +862,8 @@ " end = product_tuples[0][1]\n", " # get technical bounds for the unit output from the unit\n", " min_power, max_power = unit.calculate_min_max_power(start, end)\n", - " min_power = min_power[start]\n", - " max_power = max_power[start]\n", + " min_power = min_power[0]\n", + " max_power = max_power[0]\n", "\n", " # =============================================================================\n", " # 1. Get the Observations, which are the basis of the action decision\n", @@ -904,8 +880,6 @@ " # =============================================================================\n", " actions, noise = self.get_actions(next_observation)\n", "\n", - " bids = actions\n", - "\n", " # =============================================================================\n", " # 3.2 Transform Actions into bids\n", " # =============================================================================\n", @@ -931,6 +905,7 @@ " \"only_hours\": None,\n", " \"price\": bid_price_inflex,\n", " \"volume\": bid_quantity_inflex,\n", + " \"node\": unit.node,\n", " },\n", " {\n", " \"start_time\": start,\n", @@ -938,6 +913,7 @@ " \"only_hours\": None,\n", " \"price\": bid_price_flex,\n", " \"volume\": bid_quantity_flex,\n", + " \"node\": unit.node,\n", " },\n", " ]\n", "\n", @@ -946,10 +922,8 @@ " unit.outputs[\"rl_actions\"].append(actions)\n", "\n", " # store results in unit outputs as series to be written to the database by the unit operator\n", - " unit.outputs[\"actions\"][start] = actions\n", - " unit.outputs[\"exploration_noise\"][start] = noise\n", - "\n", - " bids = self.remove_empty_bids(bids)\n", + " unit.outputs[\"actions\"].at[start] = actions\n", + " unit.outputs[\"exploration_noise\"].at[start] = noise\n", "\n", " return bids" ] @@ -1005,7 +979,7 @@ }, "source": [ "### 3.4 Get a reward\n", - "This step is done in the *calculate_reward*()-function, which is called after the market is cleared and we get the market feedback, so we can calculate the profit. In RL, the design of a reward function is as important as the choice of the correct algorithm. During the initial phase of the work, pure economic reward in the form of the agent's profit was used. Typically, electricity market models consider only a single restart cost. Still, in the case of using RL, the split into shut-down and start-up costs allow the agents to better differentiate between these two events and learn a better policy.\n", + "This step is done in the `calculate_reward()`-function, which is called after the market is cleared and we get the market feedback, so we can calculate the profit. In RL, the design of a reward function is as important as the choice of the correct algorithm. During the initial phase of the work, pure economic reward in the form of the agent's profit was used. Typically, electricity market models consider only a single restart cost. Still, in the case of using RL, the split into shut-down and start-up costs allow the agents to better differentiate between these two events and learn a better policy.\n", "\n", "\n", "\\begin{equation}\n", @@ -1083,61 +1057,56 @@ " profit = 0\n", " reward = 0\n", " opportunity_cost = 0\n", + " costs = 0\n", "\n", " # iterate over all orders in the orderbook, to calculate order specific profit\n", " for order in orderbook:\n", " start = order[\"start_time\"]\n", " end = order[\"end_time\"]\n", + " # end includes the end of the last product, to get the last products' start time we deduct the frequency once\n", " end_excl = end - unit.index.freq\n", "\n", " # depending on whether the unit calaculates marginal costs we take costs\n", - " if unit.marginal_cost is not None:\n", - " marginal_cost = (\n", - " unit.marginal_cost[start]\n", - " if len(unit.marginal_cost) > 1\n", - " else unit.marginal_cost\n", - " )\n", - " else:\n", - " marginal_cost = unit.calc_marginal_cost_with_partial_eff(\n", - " power_output=unit.outputs[product_type].loc[start:end_excl],\n", - " timestep=start,\n", - " )\n", + " marginal_cost = unit.calculate_marginal_cost(\n", + " start, unit.outputs[product_type].at[start]\n", + " )\n", "\n", " duration = (end - start) / timedelta(hours=1)\n", "\n", - " # calculate profit as income - running_cost from this event\n", - " price_difference = order[\"accepted_price\"] - marginal_cost\n", - " order_profit = price_difference * order[\"accepted_volume\"] * duration\n", - "\n", - " # calculate opportunity cost\n", - " # as the loss of income we have because we are not running at full power\n", - " order_opportunity_cost = (\n", - " price_difference\n", - " * (\n", - " unit.max_power - unit.outputs[product_type].loc[start:end_excl]\n", - " ).sum()\n", - " * duration\n", - " )\n", "\n", - " # if our opportunity costs are negative, we did not miss an opportunity to earn money and we set them to 0\n", - " order_opportunity_cost = max(order_opportunity_cost, 0)\n", + " # calculate profit as income - running_cost from this event\n", + " order_profit = order[\"accepted_price\"] * order[\"accepted_volume\"] * duration\n", + " order_cost = marginal_cost * order[\"accepted_volume\"] * duration\n", "\n", " # collect profit and opportunity cost for all orders\n", - " opportunity_cost += order_opportunity_cost\n", " profit += order_profit\n", + " costs += order_cost\n", + "\n", + " # calculate opportunity cost\n", + " # as the loss of income we have because we are not running at full power\n", + " opportunity_cost = (\n", + " (order[\"accepted_price\"] - marginal_cost)\n", + " * (unit.max_power - unit.outputs[product_type].loc[start:end_excl]).sum()\n", + " * duration\n", + " )\n", + "\n", + " # if our opportunity costs are negative, we did not miss an opportunity to earn money and we set them to 0\n", + " opportunity_cost = max(opportunity_cost, 0)\n", "\n", " # consideration of start-up costs, which are evenly divided between the\n", " # upward and downward regulation events\n", " if (\n", - " unit.outputs[product_type].loc[start] != 0\n", + " unit.outputs[product_type].at[start] != 0\n", " and unit.outputs[product_type].loc[start - unit.index.freq] == 0\n", " ):\n", - " profit = profit - unit.hot_start_cost / 2\n", + " costs += unit.hot_start_cost / 2\n", " elif (\n", - " unit.outputs[product_type].loc[start] == 0\n", + " unit.outputs[product_type].at[start] == 0\n", " and unit.outputs[product_type].loc[start - unit.index.freq] != 0\n", " ):\n", - " profit = profit - unit.hot_start_cost / 2\n", + " costs += unit.hot_start_cost / 2\n", + "\n", + " profit = profit - costs\n", "\n", " # =============================================================================\n", " # =============================================================================\n", @@ -1153,7 +1122,10 @@ " # store results in unit outputs which are written to database by unit operator\n", " unit.outputs[\"profit\"].loc[start:end_excl] += profit\n", " unit.outputs[\"reward\"].loc[start:end_excl] = reward\n", - " unit.outputs[\"regret\"].loc[start:end_excl] = opportunity_cost" + " unit.outputs[\"regret\"].loc[start:end_excl] = regret_scale * opportunity_cost\n", + " unit.outputs[\"total_costs\"].loc[start:end_excl] = costs\n", + "\n", + " unit.outputs[\"rl_rewards\"].append(reward)" ] }, { @@ -1181,7 +1153,7 @@ "R_{i,t} = \\pi_{i,t} + \\beta cm_{i,t}\n", "\\end{equation}\n", "\n", - "Here, $\\beta$ is the regret scaling factor to adjust the ratio between profit-maximizing and regret-minimizing learning.\n", + "Here, $\\beta$ is the regret scaling factor to adjust the ratio between profit-maximizing and regret-minimizing learning. $\\beta = 0.2$ was found to work well empirically.\n", "\n", "The described reward function has proven to perform well even with many agents and to accelerate learning convergence. This is because minimizing the regret term drives the overall system to equilibrium. At a point close to the equilibrium point, the average reward of all agents would converge to a constant value since further policy changes would not lead to an additional reduction in regrets or an increase in profits. Therefore, the average reward value can also be a good indicator of learning performance and convergence." ] @@ -1369,9 +1341,9 @@ " # create world\n", " world = World(database_uri=db_uri, export_csv_path=csv_path)\n", "\n", - " # we import our defined bidding strategey class including the learning into the world bidding strategies\n", - " # in the example files we provided the name of the learning bidding strategies in the input csv \"pp_learning\"\n", - " # hence we define this strategey to be the one of the learning class\n", + " # we import our defined bidding strategy class including the learning into the world bidding strategies\n", + " # in the example files we provided the name of the learning bidding strategies in the input csv in \"pp_learning\"\n", + " # hence we define this strategy to be the one of the learning class\n", " world.bidding_strategies[\"pp_learning\"] = RLStrategy\n", "\n", " # then we load the scenario specified above from the respective input files\n", @@ -1426,9 +1398,9 @@ " # create world\n", " world = World(database_uri=db_uri, export_csv_path=csv_path)\n", "\n", - " # we import our defined bidding strategey class including the learning into the world bidding strategies\n", - " # in the example files we provided the name of the learning bidding strategeis in the input csv is \"pp_learning\"\n", - " # hence we define this strategey to be one of the learning class\n", + " # we import our defined bidding strategy class including the learning into the world bidding strategies\n", + " # in the example files we provided the name of the learning bidding strategies in the input csv in \"pp_learning\"\n", + " # hence we define this strategy to be the one of the learning class\n", " world.bidding_strategies[\"pp_learning\"] = RLStrategy\n", "\n", " # then we load the scenario specified above from the respective input files\n", @@ -1483,9 +1455,9 @@ " # create world\n", " world = World(database_uri=db_uri, export_csv_path=csv_path)\n", "\n", - " # we import our defined bidding strategey class including the learning into the world bidding strategies\n", - " # in the example files we provided the name of the learning bidding strategeis in the input csv is \"pp_learning\"\n", - " # hence we define this strategey to be one of the learning class\n", + " # we import our defined bidding strategy class including the learning into the world bidding strategies\n", + " # in the example files we provided the name of the learning bidding strategies in the input csv in \"pp_learning\"\n", + " # hence we define this strategy to be the one of the learning class\n", " world.bidding_strategies[\"pp_learning\"] = RLStrategy\n", "\n", " # then we load the scenario specified above from the respective input files\n", @@ -1875,6 +1847,7 @@ " Create observation\n", " \"\"\"\n", "\n", + " # end includes the end of the last product, to get the last products' start time we deduct the frequency once\n", " end_excl = end - unit.index.freq\n", "\n", " # get the forecast length depending on the time unit considered in the modelled unit\n", @@ -1903,7 +1876,7 @@ " > unit.forecaster[f\"residual_load_{market_id}\"].index[-1]\n", " ):\n", " scaled_res_load_forecast = (\n", - " unit.forecaster[f\"residual_load_{market_id}\"].loc[start:].values\n", + " unit.forecaster[f\"residual_load_{market_id}\"].loc[start:]\n", " / scaling_factor_res_load\n", " )\n", " scaled_res_load_forecast = np.concatenate(\n", @@ -1917,16 +1890,15 @@ "\n", " else:\n", " scaled_res_load_forecast = (\n", - " unit.forecaster[f\"residual_load_{market_id}\"]\n", - " .loc[start : end_excl + forecast_len]\n", - " .values\n", + " unit.forecaster[f\"residual_load_{market_id}\"].loc[\n", + " start : end_excl + forecast_len\n", + " ]\n", " / scaling_factor_res_load\n", " )\n", "\n", " if end_excl + forecast_len > unit.forecaster[f\"price_{market_id}\"].index[-1]:\n", " scaled_price_forecast = (\n", - " unit.forecaster[f\"price_{market_id}\"].loc[start:].values\n", - " / scaling_factor_price\n", + " unit.forecaster[f\"price_{market_id}\"].loc[start:] / scaling_factor_price\n", " )\n", " scaled_price_forecast = np.concatenate(\n", " [\n", @@ -1939,15 +1911,15 @@ "\n", " else:\n", " scaled_price_forecast = (\n", - " unit.forecaster[f\"price_{market_id}\"]\n", - " .loc[start : end_excl + forecast_len]\n", - " .values\n", + " unit.forecaster[f\"price_{market_id}\"].loc[\n", + " start : end_excl + forecast_len\n", + " ]\n", " / scaling_factor_price\n", " )\n", "\n", " # get last accepted bid volume and the current marginal costs of the unit\n", " current_volume = unit.get_output_before(start)\n", - " current_costs = unit.calc_marginal_cost_with_partial_eff(current_volume, start)\n", + " current_costs = unit.calculate_marginal_cost(start, current_volume)\n", "\n", " # scale unit outputs\n", " scaled_total_capacity = current_volume / scaling_factor_total_capacity\n", @@ -2052,8 +2024,8 @@ " end = product_tuples[0][1]\n", " # get technical bounds for the unit output from the unit\n", " min_power, max_power = unit.calculate_min_max_power(start, end)\n", - " min_power = min_power[start]\n", - " max_power = max_power[start]\n", + " min_power = min_power[0]\n", + " max_power = max_power[0]\n", "\n", " # =============================================================================\n", " # 1. Get the Observations, which are the basis of the action decision\n", @@ -2070,8 +2042,6 @@ " # =============================================================================\n", " actions, noise = self.get_actions(next_observation)\n", "\n", - " bids = actions\n", - "\n", " # =============================================================================\n", " # 3.2 Transform Actions into bids\n", " # =============================================================================\n", @@ -2099,6 +2069,7 @@ " \"only_hours\": None,\n", " \"price\": bid_price_inflex,\n", " \"volume\": bid_quantity_inflex,\n", + " \"node\": unit.node,\n", " },\n", " {\n", " \"start_time\": start,\n", @@ -2106,6 +2077,7 @@ " \"only_hours\": None,\n", " \"price\": bid_price_flex,\n", " \"volume\": bid_quantity_flex,\n", + " \"node\": unit.node,\n", " },\n", " ]\n", "\n", @@ -2114,14 +2086,11 @@ " unit.outputs[\"rl_actions\"].append(actions)\n", "\n", " # store results in unit outputs as series to be written to the database by the unit operator\n", - " unit.outputs[\"actions\"][start] = actions\n", - " unit.outputs[\"exploration_noise\"][start] = noise\n", - "\n", - " bids = self.remove_empty_bids(bids)\n", + " unit.outputs[\"actions\"].at[start] = actions\n", + " unit.outputs[\"exploration_noise\"].at[start] = noise\n", "\n", " return bids\n", "\n", - "\n", "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", "# this is a workaround to have different methods of the class in different cells\n", "# which is good for the purpose of this tutorial\n", @@ -2150,61 +2119,57 @@ " profit = 0\n", " reward = 0\n", " opportunity_cost = 0\n", + " costs = 0\n", "\n", " # iterate over all orders in the orderbook, to calculate order specific profit\n", " for order in orderbook:\n", " start = order[\"start_time\"]\n", " end = order[\"end_time\"]\n", + " \n", + " # end includes the end of the last product, to get the last products' start time we deduct the frequency once\n", " end_excl = end - unit.index.freq\n", "\n", " # depending on whether the unit calaculates marginal costs we take costs\n", - " if unit.marginal_cost is not None:\n", - " marginal_cost = (\n", - " unit.marginal_cost[start]\n", - " if len(unit.marginal_cost) > 1\n", - " else unit.marginal_cost\n", - " )\n", - " else:\n", - " marginal_cost = unit.calc_marginal_cost_with_partial_eff(\n", - " power_output=unit.outputs[product_type].loc[start:end_excl],\n", - " timestep=start,\n", - " )\n", + " marginal_cost = unit.calculate_marginal_cost(\n", + " start, unit.outputs[product_type].at[start]\n", + " )\n", "\n", " duration = (end - start) / timedelta(hours=1)\n", "\n", - " # calculate profit as income - running_cost from this event\n", - " price_difference = order[\"accepted_price\"] - marginal_cost\n", - " order_profit = price_difference * order[\"accepted_volume\"] * duration\n", - "\n", - " # calculate opportunity cost\n", - " # as the loss of income we have because we are not running at full power\n", - " order_opportunity_cost = (\n", - " price_difference\n", - " * (\n", - " unit.max_power - unit.outputs[product_type].loc[start:end_excl]\n", - " ).sum()\n", - " * duration\n", - " )\n", "\n", - " # if our opportunity costs are negative, we did not miss an opportunity to earn money and we set them to 0\n", - " order_opportunity_cost = max(order_opportunity_cost, 0)\n", + " # calculate profit as income - running_cost from this event\n", + " order_profit = order[\"accepted_price\"] * order[\"accepted_volume\"] * duration\n", + " order_cost = marginal_cost * order[\"accepted_volume\"] * duration\n", "\n", " # collect profit and opportunity cost for all orders\n", - " opportunity_cost += order_opportunity_cost\n", " profit += order_profit\n", + " costs += order_cost\n", + "\n", + " # calculate opportunity cost\n", + " # as the loss of income we have because we are not running at full power\n", + " opportunity_cost = (\n", + " (order[\"accepted_price\"] - marginal_cost)\n", + " * (unit.max_power - unit.outputs[product_type].loc[start:end_excl]).sum()\n", + " * duration\n", + " )\n", + "\n", + " # if our opportunity costs are negative, we did not miss an opportunity to earn money and we set them to 0\n", + " opportunity_cost = max(opportunity_cost, 0)\n", "\n", " # consideration of start-up costs, which are evenly divided between the\n", " # upward and downward regulation events\n", " if (\n", - " unit.outputs[product_type].loc[start] != 0\n", + " unit.outputs[product_type].at[start] != 0\n", " and unit.outputs[product_type].loc[start - unit.index.freq] == 0\n", " ):\n", - " profit = profit - unit.hot_start_cost / 2\n", + " costs += unit.hot_start_cost / 2\n", " elif (\n", - " unit.outputs[product_type].loc[start] == 0\n", + " unit.outputs[product_type].at[start] == 0\n", " and unit.outputs[product_type].loc[start - unit.index.freq] != 0\n", " ):\n", - " profit = profit - unit.hot_start_cost / 2\n", + " costs += unit.hot_start_cost / 2\n", + "\n", + " profit = profit - costs\n", "\n", " # =============================================================================\n", " # =============================================================================\n", @@ -2220,7 +2185,10 @@ " # store results in unit outputs which are written to database by unit operator\n", " unit.outputs[\"profit\"].loc[start:end_excl] += profit\n", " unit.outputs[\"reward\"].loc[start:end_excl] = reward\n", - " unit.outputs[\"regret\"].loc[start:end_excl] = opportunity_cost\n", + " unit.outputs[\"regret\"].loc[start:end_excl] = regret_scale * opportunity_cost\n", + " unit.outputs[\"total_costs\"].loc[start:end_excl] = costs\n", + "\n", + " unit.outputs[\"rl_rewards\"].append(reward)\n", "\n", "\n", "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", @@ -2330,9 +2298,9 @@ " # create world\n", " world = World(database_uri=db_uri, export_csv_path=csv_path)\n", "\n", - " # we import our defined bidding strategey class including the learning into the world bidding strategies\n", - " # in the example files we provided the name of the learning bidding strategies in the input csv \"pp_learning\"\n", - " # hence we define this strategey to be the one of the learning class\n", + " # we import our defined bidding strategy class including the learning into the world bidding strategies\n", + " # in the example files we provided the name of the learning bidding strategies in the input csv in \"pp_learning\"\n", + " # hence we define this strategy to be the one of the learning class\n", " world.bidding_strategies[\"pp_learning\"] = RLStrategy\n", "\n", " # then we load the scenario specified above from the respective input files\n", @@ -2371,9 +2339,9 @@ " # create world\n", " world = World(database_uri=db_uri, export_csv_path=csv_path)\n", "\n", - " # we import our defined bidding strategey class including the learning into the world bidding strategies\n", - " # in the example files we provided the name of the learning bidding strategeis in the input csv is \"pp_learning\"\n", - " # hence we define this strategey to be one of the learning class\n", + " # we import our defined bidding strategy class including the learning into the world bidding strategies\n", + " # in the example files we provided the name of the learning bidding strategies in the input csv in \"pp_learning\"\n", + " # hence we define this strategy to be the one of the learning class\n", " world.bidding_strategies[\"pp_learning\"] = RLStrategy\n", "\n", " # then we load the scenario specified above from the respective input files\n", @@ -2412,9 +2380,9 @@ " # create world\n", " world = World(database_uri=db_uri, export_csv_path=csv_path)\n", "\n", - " # we import our defined bidding strategey class including the learning into the world bidding strategies\n", - " # in the example files we provided the name of the learning bidding strategeis in the input csv is \"pp_learning\"\n", - " # hence we define this strategey to be one of the learning class\n", + " # we import our defined bidding strategy class including the learning into the world bidding strategies\n", + " # in the example files we provided the name of the learning bidding strategies in the input csv in \"pp_learning\"\n", + " # hence we define this strategy to be the one of the learning class\n", " world.bidding_strategies[\"pp_learning\"] = RLStrategy\n", "\n", " # then we load the scenario specified above from the respective input files\n", @@ -2557,7 +2525,7 @@ ], "metadata": { "kernelspec": { - "display_name": "assume-framework", + "display_name": "assume", "language": "python", "name": "python3" }, diff --git a/examples/notebooks/09_example_Sim_and_xRL.ipynb b/examples/notebooks/09_example_Sim_and_xRL.ipynb index 3b4de359..dfa41a09 100644 --- a/examples/notebooks/09_example_Sim_and_xRL.ipynb +++ b/examples/notebooks/09_example_Sim_and_xRL.ipynb @@ -1042,7 +1042,7 @@ "id": "ddfe95d9" }, "source": [ - "We define a utility function to load observations and input data from a specified path. Analyzing the shap values for all observations and all parameters would make this notebook quite lengthy, so we’re filtering the observation data frame to include only 700 observations." + "We define a utility function to load observations and input data from a specified path." ] }, { @@ -1061,8 +1061,6 @@ " # Load observations\n", " obs_path = f\"{path}/buffer_obs.json\"\n", "\n", - " print(obs_path)\n", - "\n", " with open(obs_path) as file:\n", " json_data = json.load(file)\n", "\n", @@ -1070,14 +1068,9 @@ " input_data = np.array(json_data)\n", " input_data = np.squeeze(input_data)\n", "\n", - " print(len(input_data))\n", " # filter out arrays where all value are 0\n", " input_data = input_data[~np.all(input_data == 0, axis=1)]\n", "\n", - " print(len(input_data))\n", - " # filter only first 700 observations\n", - " input_data = input_data[:300]\n", - "\n", " return pd.DataFrame(input_data, columns=feature_names), input_data" ] }, @@ -1458,7 +1451,7 @@ "notebook_metadata_filter": "-all" }, "kernelspec": { - "display_name": "assume-framework", + "display_name": "assume", "language": "python", "name": "python3" }, @@ -1472,7 +1465,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.12.7" } }, "nbformat": 4,