diff --git a/examples/notebooks/04_reinforcement_learning_example.ipynb b/examples/notebooks/04_reinforcement_learning_example.ipynb index 41c88a71..2b213b3e 100644 --- a/examples/notebooks/04_reinforcement_learning_example.ipynb +++ b/examples/notebooks/04_reinforcement_learning_example.ipynb @@ -300,6 +300,7 @@ "source": [ "import logging\n", "import os\n", + "import yaml\n", "from datetime import datetime, timedelta\n", "from pathlib import Path\n", "\n", @@ -564,18 +565,18 @@ " # =============================================================================\n", " # 1.1 Get the Observations, which are the basis of the action decision\n", " # =============================================================================\n", + " \n", " # residual load forecast\n", - " # residual load forecast\n", - " scaling_factor_res_load = self.max_demand\n", + " scaling_factor_res_load = None #TODO\n", "\n", " # price forecast\n", - " scaling_factor_price = self.max_bid_price\n", + " scaling_factor_price = None #TODO\n", "\n", " # total capacity\n", - " scaling_factor_total_capacity = unit.max_power\n", + " scaling_factor_total_capacity = None #TODO\n", "\n", " # marginal cost\n", - " scaling_factor_marginal_cost = self.max_bid_price\n", + " scaling_factor_marginal_cost = None #TODO\n", "\n", " # checks if we are at the end of the simulation horizon, since we need to change the forecast then\n", " # for residual load and price forecast and scale them\n", @@ -775,7 +776,7 @@ " # =============================================================================\n", " # ==> YOUR CODE HERE\n", " base_bid = None # TODO\n", - " # add niose to the last dimension of the observation\n", + " # add noise to the last dimension of the observation\n", " # needs to be adjusted if observation space is changed, because only makes sense\n", " # if the last dimension of the observation space are the marginal cost\n", " curr_action = noise + base_bid.clone().detach()\n", @@ -1287,7 +1288,7 @@ "source": [ "learning_config = {\n", " \"continue_learning\": False,\n", - " \"trained_policies_save_path\": \"null\",\n", + " \"trained_policies_save_path\": None,\n", " \"max_bid_price\": 100,\n", " \"algorithm\": \"matd3\",\n", " \"learning_rate\": 0.001,\n", @@ -1305,6 +1306,26 @@ "}" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "bac01731", + "metadata": {}, + "outputs": [], + "source": [ + "# Read the YAML file\n", + "with open(f\"{inputs_path}/example_02a/config.yaml\") as file:\n", + " data = yaml.safe_load(file)\n", + "\n", + "# store our modifications to the config file\n", + "data[\"base\"][\"learning_mode\"] = True\n", + "data[\"base\"][\"learning_config\"] = learning_config\n", + "\n", + "# Write the modified data back to the file\n", + "with open(f\"{inputs_path}/example_02a/config.yaml\", \"w\") as file:\n", + " yaml.safe_dump(data, file)" + ] + }, { "cell_type": "markdown", "id": "132f9429", @@ -1662,6 +1683,866 @@ "lines_to_next_cell": 2 }, "outputs": [], + "source": [ + "# @title Complete notebook code with tasks already filled in\n", + "\n", + "# this cell is used to display the image in the notebook when using colab\n", + "# or running the notebook locally\n", + "\n", + "import os\n", + "\n", + "import importlib.util\n", + "\n", + "# Check if 'google.colab' is available\n", + "IN_COLAB = importlib.util.find_spec(\"google.colab\") is not None\n", + "\n", + "if IN_COLAB:\n", + " !pip install 'assume-framework[learning]'\n", + " # Colab currently has issues with pyomo version 6.8.2, causing the notebook to crash\n", + " # Installing an older version resolves this issue. This should only be considered a temporary fix.\n", + " !pip install pyomo==6.8.0\n", + " !git clone --depth=1 https://github.com/assume-framework/assume.git assume-repo\n", + " !cd assume-repo && assume -s example_01b -db \"sqlite:///./examples/local_db/assume_db_example_01b.db\"\n", + "\n", + "colab_inputs_path = \"assume-repo/examples/inputs\"\n", + "local_inputs_path = \"../inputs\"\n", + "\n", + "inputs_path = colab_inputs_path if IN_COLAB else local_inputs_path \n", + "\n", + "import logging\n", + "import os\n", + "import yaml\n", + "from datetime import datetime, timedelta\n", + "from pathlib import Path\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import torch as th\n", + "\n", + "from assume import World\n", + "from assume.common.base import LearningStrategy, SupportsMinMax\n", + "from assume.common.market_objects import MarketConfig, Orderbook, Product\n", + "from assume.reinforcement_learning.algorithms import actor_architecture_aliases\n", + "from assume.reinforcement_learning.learning_utils import NormalActionNoise\n", + "from assume.scenario.loader_csv import load_scenario_folder, run_learning\n", + "\n", + "class RLStrategy(LearningStrategy):\n", + " \"\"\"\n", + " Reinforcement Learning Strategy\n", + " \"\"\"\n", + "\n", + " def __init__(self, *args, **kwargs):\n", + " super().__init__(obs_dim=50, act_dim=2, unique_obs_dim=2, *args, **kwargs)\n", + "\n", + " self.unit_id = kwargs[\"unit_id\"]\n", + "\n", + " # defines bounds of actions space\n", + " self.max_bid_price = kwargs.get(\"max_bid_price\", 100)\n", + " self.max_demand = kwargs.get(\"max_demand\", 10e3)\n", + "\n", + " # tells us whether we are training the agents or just executing per-learnind strategies\n", + " self.learning_mode = kwargs.get(\"learning_mode\", False)\n", + " self.perform_evaluation = kwargs.get(\"perform_evaluation\", False)\n", + "\n", + " # based on learning config define algorithm configuration\n", + " self.algorithm = kwargs.get(\"algorithm\", \"matd3\")\n", + " actor_architecture = kwargs.get(\"actor_architecture\", \"mlp\")\n", + "\n", + " # define the architecture of the actor neural network\n", + " # if you use many time series niputs you might want to use the LSTM instead of the MLP for example\n", + " if actor_architecture in actor_architecture_aliases.keys():\n", + " self.actor_architecture_class = actor_architecture_aliases[\n", + " actor_architecture\n", + " ]\n", + " else:\n", + " raise ValueError(\n", + " f\"Policy '{actor_architecture}' unknown. Supported architectures are {list(actor_architecture_aliases.keys())}\"\n", + " )\n", + "\n", + " # sets the devide of the actor network\n", + " device = kwargs.get(\"device\", \"cpu\")\n", + " self.device = th.device(device if th.cuda.is_available() else \"cpu\")\n", + " if not self.learning_mode:\n", + " self.device = th.device(\"cpu\")\n", + "\n", + " # future: add option to choose between float16 and float32\n", + " # float_type = kwargs.get(\"float_type\", \"float32\")\n", + " self.float_type = th.float\n", + "\n", + " # for definition of observation space\n", + " self.foresight = kwargs.get(\"foresight\", 24)\n", + "\n", + " if self.learning_mode:\n", + " self.learning_role = None\n", + " self.collect_initial_experience_mode = kwargs.get(\n", + " \"episodes_collecting_initial_experience\", True\n", + " )\n", + "\n", + " self.action_noise = NormalActionNoise(\n", + " mu=0.0,\n", + " sigma=kwargs.get(\"noise_sigma\", 0.1),\n", + " action_dimension=self.act_dim,\n", + " scale=kwargs.get(\"noise_scale\", 1.0),\n", + " dt=kwargs.get(\"noise_dt\", 1.0),\n", + " )\n", + "\n", + " elif Path(load_path=kwargs[\"trained_policies_save_path\"]).is_dir():\n", + " self.load_actor_params(load_path=kwargs[\"trained_policies_save_path\"])\n", + "\n", + "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", + "# this is a workaround to have different methods of the class in different cells\n", + "# which is good for the purpose of this tutorial\n", + "# however, you should have all functions in a single class when using this example in .py files\n", + "\n", + "\n", + "class RLStrategy(RLStrategy):\n", + " def calculate_bids(\n", + " self,\n", + " unit: SupportsMinMax,\n", + " market_config: MarketConfig,\n", + " product_tuples: list[Product],\n", + " **kwargs,\n", + " ) -> Orderbook:\n", + " \"\"\"\n", + " Calculate bids for a unit -> STEP 1 & 2\n", + " \"\"\"\n", + "\n", + " start = product_tuples[0][0]\n", + " end = product_tuples[0][1]\n", + " # get technical bounds for the unit output from the unit\n", + " min_power, max_power = unit.calculate_min_max_power(start, end)\n", + " min_power = min_power[start]\n", + " max_power = max_power[start]\n", + "\n", + " # =============================================================================\n", + " # 1. Get the Observations, which are the basis of the action decision\n", + " # =============================================================================\n", + " next_observation = self.create_observation(\n", + " unit=unit,\n", + " market_id=market_config.market_id,\n", + " start=start,\n", + " end=end,\n", + " )\n", + "\n", + " # =============================================================================\n", + " # 2. Get the Actions, based on the observations\n", + " # =============================================================================\n", + " actions, noise = self.get_actions(next_observation)\n", + "\n", + " bids = actions\n", + "\n", + " bids = self.remove_empty_bids(bids)\n", + "\n", + " return bids\n", + " \n", + "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", + "# this is a workaround to have different methods of the class in different cells\n", + "# which is good for the purpose of this tutorial\n", + "# however, you should have all functions in a single class when using this example in .py files\n", + "\n", + "\n", + "class RLStrategy(RLStrategy):\n", + " def calculate_reward(\n", + " self,\n", + " unit,\n", + " marketconfig: MarketConfig,\n", + " orderbook: Orderbook,\n", + " ):\n", + " \"\"\"\n", + " Calculate reward\n", + " \"\"\"\n", + "\n", + " return None\n", + " \n", + "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", + "# this is a workaround to have different methods of the class in different cells\n", + "# which is good for the purpose of this tutorial\n", + "# however, you should have all functions in a single class when using this example in .py files\n", + "\n", + "class RLStrategy(RLStrategy):\n", + " def create_observation(\n", + " self,\n", + " unit: SupportsMinMax,\n", + " market_id: str,\n", + " start: datetime,\n", + " end: datetime,\n", + " ):\n", + " \"\"\"\n", + " Create observation\n", + " \"\"\"\n", + "\n", + " end_excl = end - unit.index.freq\n", + "\n", + " # get the forecast length depending on the time unit considered in the modelled unit\n", + " forecast_len = pd.Timedelta((self.foresight - 1) * unit.index.freq)\n", + "\n", + " # =============================================================================\n", + " # 1.1 Get the Observations, which are the basis of the action decision\n", + " # =============================================================================\n", + " \n", + " # residual load forecast\n", + " scaling_factor_res_load = self.max_demand\n", + "\n", + " # price forecast\n", + " scaling_factor_price = self.max_bid_price\n", + "\n", + " # total capacity\n", + " scaling_factor_total_capacity = unit.max_power\n", + "\n", + " # marginal cost\n", + " scaling_factor_marginal_cost = self.max_bid_price\n", + "\n", + " # checks if we are at the end of the simulation horizon, since we need to change the forecast then\n", + " # for residual load and price forecast and scale them\n", + " if (\n", + " end_excl + forecast_len\n", + " > unit.forecaster[f\"residual_load_{market_id}\"].index[-1]\n", + " ):\n", + " scaled_res_load_forecast = (\n", + " unit.forecaster[f\"residual_load_{market_id}\"].loc[start:].values\n", + " / scaling_factor_res_load\n", + " )\n", + " scaled_res_load_forecast = np.concatenate(\n", + " [\n", + " scaled_res_load_forecast,\n", + " unit.forecaster[f\"residual_load_{market_id}\"].iloc[\n", + " : self.foresight - len(scaled_res_load_forecast)\n", + " ],\n", + " ]\n", + " )\n", + "\n", + " else:\n", + " scaled_res_load_forecast = (\n", + " unit.forecaster[f\"residual_load_{market_id}\"]\n", + " .loc[start : end_excl + forecast_len]\n", + " .values\n", + " / scaling_factor_res_load\n", + " )\n", + "\n", + " if end_excl + forecast_len > unit.forecaster[f\"price_{market_id}\"].index[-1]:\n", + " scaled_price_forecast = (\n", + " unit.forecaster[f\"price_{market_id}\"].loc[start:].values\n", + " / scaling_factor_price\n", + " )\n", + " scaled_price_forecast = np.concatenate(\n", + " [\n", + " scaled_price_forecast,\n", + " unit.forecaster[f\"price_{market_id}\"].iloc[\n", + " : self.foresight - len(scaled_price_forecast)\n", + " ],\n", + " ]\n", + " )\n", + "\n", + " else:\n", + " scaled_price_forecast = (\n", + " unit.forecaster[f\"price_{market_id}\"]\n", + " .loc[start : end_excl + forecast_len]\n", + " .values\n", + " / scaling_factor_price\n", + " )\n", + "\n", + " # get last accepted bid volume and the current marginal costs of the unit\n", + " current_volume = unit.get_output_before(start)\n", + " current_costs = unit.calc_marginal_cost_with_partial_eff(current_volume, start)\n", + "\n", + " # scale unit outputs\n", + " scaled_total_capacity = current_volume / scaling_factor_total_capacity\n", + " scaled_marginal_cost = current_costs / scaling_factor_marginal_cost\n", + "\n", + " # concat all obsverations into one array\n", + " observation = np.concatenate(\n", + " [\n", + " scaled_res_load_forecast,\n", + " scaled_price_forecast,\n", + " np.array([scaled_total_capacity, scaled_marginal_cost]),\n", + " ]\n", + " )\n", + "\n", + " # transfer array to GPU for NN processing\n", + " observation = (\n", + " th.tensor(observation, dtype=self.float_type)\n", + " .to(self.device, non_blocking=True)\n", + " .view(-1)\n", + " )\n", + "\n", + " return observation.detach().clone()\n", + "\n", + "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", + "# this is a workaround to have different methods of the class in different cells\n", + "# which is good for the purpose of this tutorial\n", + "# however, you should have all functions in a single class when using this example in .py files\n", + "\n", + "\n", + "class RLStrategy(RLStrategy):\n", + " def get_actions(self, next_observation):\n", + " \"\"\"\n", + " Get actions\n", + " \"\"\"\n", + "\n", + " # distinction whether we are in learning mode or not to handle exploration realised with noise\n", + " if self.learning_mode:\n", + " # if we are in learning mode, the first x episodes we want to explore the entire action space\n", + " # to get a good initial experience in the area around the costs of the agent\n", + " if self.collect_initial_experience_mode:\n", + " # define current action as solely noise\n", + " noise = (\n", + " th.normal(\n", + " mean=0.0, std=0.2, size=(1, self.act_dim), dtype=self.float_type\n", + " )\n", + " .to(self.device)\n", + " .squeeze()\n", + " )\n", + "\n", + " # =============================================================================\n", + " # 2.1 Get Actions and handle exploration\n", + " # =============================================================================\n", + " # ==> YOUR CODE HERE\n", + " base_bid = next_observation[-1] # = marginal_costs\n", + " # add noise to the last dimension of the observation\n", + " # needs to be adjusted if observation space is changed, because only makes sense\n", + " # if the last dimension of the observation space are the marginal cost\n", + " curr_action = noise + base_bid.clone().detach()\n", + "\n", + " else:\n", + " # if we are not in the initial exploration phase we chose the action with the actor neuronal net\n", + " # and add noise to the action\n", + " curr_action = self.actor(next_observation).detach()\n", + " noise = th.tensor(\n", + " self.action_noise.noise(), device=self.device, dtype=self.float_type\n", + " )\n", + " curr_action += noise\n", + " else:\n", + " # if we are not in learning mode we just use the actor neuronal net to get the action without adding noise\n", + "\n", + " curr_action = self.actor(next_observation).detach()\n", + " noise = tuple(0 for _ in range(self.act_dim))\n", + "\n", + " curr_action = curr_action.clamp(-1, 1)\n", + "\n", + " return curr_action, noise\n", + " \n", + "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", + "# this is a workaround to have different methods of the class in different cells\n", + "# which is good for the purpose of this tutorial\n", + "# however, you should have all functions in a single class when using this example in .py files\n", + "\n", + "\n", + "class RLStrategy(RLStrategy):\n", + " def calculate_bids(\n", + " self,\n", + " unit: SupportsMinMax,\n", + " market_config: MarketConfig,\n", + " product_tuples: list[Product],\n", + " **kwargs,\n", + " ) -> Orderbook:\n", + " \"\"\"\n", + " Calculate bids for a unit\n", + " \"\"\"\n", + "\n", + " bid_quantity_inflex, bid_price_inflex = 0, 0\n", + " bid_quantity_flex, bid_price_flex = 0, 0\n", + "\n", + " start = product_tuples[0][0]\n", + " end = product_tuples[0][1]\n", + " # get technical bounds for the unit output from the unit\n", + " min_power, max_power = unit.calculate_min_max_power(start, end)\n", + " min_power = min_power[start]\n", + " max_power = max_power[start]\n", + "\n", + " # =============================================================================\n", + " # 1. Get the Observations, which are the basis of the action decision\n", + " # =============================================================================\n", + " next_observation = self.create_observation(\n", + " unit=unit,\n", + " market_id=market_config.market_id,\n", + " start=start,\n", + " end=end,\n", + " )\n", + "\n", + " # =============================================================================\n", + " # 2. Get the Actions, based on the observations\n", + " # =============================================================================\n", + " actions, noise = self.get_actions(next_observation)\n", + "\n", + " bids = actions\n", + "\n", + " # =============================================================================\n", + " # 3.2 Transform Actions into bids\n", + " # =============================================================================\n", + " # ==> YOUR CODE HERE\n", + " # actions are in the range [0,1], we need to transform them into actual bids\n", + " # we can use our domain knowledge to guide the bid formulation\n", + "\n", + " #calculate actual bids\n", + " #rescale actions to actual prices\n", + " bid_prices = actions * self.max_bid_price\n", + "\n", + " #calculate inflexible part of the bid\n", + " bid_quantity_inflex = min_power\n", + " bid_price_inflex = min(bid_prices)\n", + "\n", + " #calculate flexible part of the bid\n", + " bid_quantity_flex = max_power - bid_quantity_inflex\n", + " bid_price_flex = max(bid_prices)\n", + "\n", + " # actually formulate bids in orderbook format\n", + " bids = [\n", + " {\n", + " \"start_time\": start,\n", + " \"end_time\": end,\n", + " \"only_hours\": None,\n", + " \"price\": bid_price_inflex,\n", + " \"volume\": bid_quantity_inflex,\n", + " },\n", + " {\n", + " \"start_time\": start,\n", + " \"end_time\": end,\n", + " \"only_hours\": None,\n", + " \"price\": bid_price_flex,\n", + " \"volume\": bid_quantity_flex,\n", + " },\n", + " ]\n", + "\n", + " # store results in unit outputs as lists to be written to the buffer for learning\n", + " unit.outputs[\"rl_observations\"].append(next_observation)\n", + " unit.outputs[\"rl_actions\"].append(actions)\n", + "\n", + " # store results in unit outputs as series to be written to the database by the unit operator\n", + " unit.outputs[\"actions\"][start] = actions\n", + " unit.outputs[\"exploration_noise\"][start] = noise\n", + "\n", + " bids = self.remove_empty_bids(bids)\n", + "\n", + " return bids\n", + " \n", + "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", + "# this is a workaround to have different methods of the class in different cells\n", + "# which is good for the purpose of this tutorial\n", + "# however, you should have all functions in a single class when using this example in .py files\n", + "\n", + "\n", + "class RLStrategy(RLStrategy):\n", + " def calculate_reward(\n", + " self,\n", + " unit,\n", + " marketconfig: MarketConfig,\n", + " orderbook: Orderbook,\n", + " ):\n", + " \"\"\"\n", + " Calculate reward\n", + " \"\"\"\n", + "\n", + " # =============================================================================\n", + " # 3. Calculate Reward\n", + " # =============================================================================\n", + " # function is called after the market is cleared and we get the market feedback,\n", + " # so we can calculate the profit\n", + "\n", + " product_type = marketconfig.product_type\n", + "\n", + " profit = 0\n", + " reward = 0\n", + " opportunity_cost = 0\n", + "\n", + " # iterate over all orders in the orderbook, to calculate order specific profit\n", + " for order in orderbook:\n", + " start = order[\"start_time\"]\n", + " end = order[\"end_time\"]\n", + " end_excl = end - unit.index.freq\n", + "\n", + " # depending on whether the unit calaculates marginal costs we take costs\n", + " if unit.marginal_cost is not None:\n", + " marginal_cost = (\n", + " unit.marginal_cost[start]\n", + " if len(unit.marginal_cost) > 1\n", + " else unit.marginal_cost\n", + " )\n", + " else:\n", + " marginal_cost = unit.calc_marginal_cost_with_partial_eff(\n", + " power_output=unit.outputs[product_type].loc[start:end_excl],\n", + " timestep=start,\n", + " )\n", + "\n", + " duration = (end - start) / timedelta(hours=1)\n", + "\n", + " # calculate profit as income - running_cost from this event\n", + " price_difference = order[\"accepted_price\"] - marginal_cost\n", + " order_profit = price_difference * order[\"accepted_volume\"] * duration\n", + "\n", + " # calculate opportunity cost\n", + " # as the loss of income we have because we are not running at full power\n", + " order_opportunity_cost = (\n", + " price_difference\n", + " * (\n", + " unit.max_power - unit.outputs[product_type].loc[start:end_excl]\n", + " ).sum()\n", + " * duration\n", + " )\n", + "\n", + " # if our opportunity costs are negative, we did not miss an opportunity to earn money and we set them to 0\n", + " order_opportunity_cost = max(order_opportunity_cost, 0)\n", + "\n", + " # collect profit and opportunity cost for all orders\n", + " opportunity_cost += order_opportunity_cost\n", + " profit += order_profit\n", + "\n", + " # consideration of start-up costs, which are evenly divided between the\n", + " # upward and downward regulation events\n", + " if (\n", + " unit.outputs[product_type].loc[start] != 0\n", + " and unit.outputs[product_type].loc[start - unit.index.freq] == 0\n", + " ):\n", + " profit = profit - unit.hot_start_cost / 2\n", + " elif (\n", + " unit.outputs[product_type].loc[start] == 0\n", + " and unit.outputs[product_type].loc[start - unit.index.freq] != 0\n", + " ):\n", + " profit = profit - unit.hot_start_cost / 2\n", + "\n", + " # =============================================================================\n", + " # =============================================================================\n", + " # ==> YOUR CODE HERE\n", + " # The straight forward implementation would be reward = profit, yet we would like to give the agent more guidance\n", + " # in the learning process, so we add a regret term to the reward, which is the opportunity cost\n", + " # define the reward and scale it\n", + "\n", + " scaling = 0.1 / unit.max_power\n", + " regret_scale = 0.2\n", + " reward = float(profit - regret_scale * opportunity_cost) * scaling\n", + "\n", + " # store results in unit outputs which are written to database by unit operator\n", + " unit.outputs[\"profit\"].loc[start:end_excl] += profit\n", + " unit.outputs[\"reward\"].loc[start:end_excl] = reward\n", + " unit.outputs[\"regret\"].loc[start:end_excl] = opportunity_cost\n", + "\n", + "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", + "# this is a workaround to have different methods of the class in different cells\n", + "# which is good for the purpose of this tutorial\n", + "# however, you should have all functions in a single class when using this example in .py files\n", + "\n", + "\n", + "class RLStrategy(RLStrategy):\n", + " def load_actor_params(self, load_path):\n", + " \"\"\"\n", + " Load actor parameters\n", + " \"\"\"\n", + " directory = f\"{load_path}/actors/actor_{self.unit_id}.pt\"\n", + "\n", + " params = th.load(directory, map_location=self.device)\n", + "\n", + " self.actor = self.actor_architecture_class(\n", + " obs_dim=self.obs_dim,\n", + " act_dim=self.act_dim,\n", + " float_type=self.float_type,\n", + " unique_obs_dim=self.unique_obs_dim,\n", + " num_timeseries_obs_dim=self.num_timeseries_obs_dim,\n", + " ).to(self.device)\n", + "\n", + " self.actor.load_state_dict(params[\"actor\"])\n", + "\n", + " if self.learning_mode:\n", + " self.actor_target = self.actor_architecture_class(\n", + " obs_dim=self.obs_dim,\n", + " act_dim=self.act_dim,\n", + " float_type=self.float_type,\n", + " unique_obs_dim=self.unique_obs_dim,\n", + " num_timeseries_obs_dim=self.num_timeseries_obs_dim,\n", + " ).to(self.device)\n", + " self.actor_target.load_state_dict(params[\"actor_target\"])\n", + " self.actor_target.eval()\n", + " self.actor.optimizer.load_state_dict(params[\"actor_optimizer\"])\n", + "\n", + "learning_config = {\n", + " \"continue_learning\": False,\n", + " \"trained_policies_save_path\": None,\n", + " \"max_bid_price\": 100,\n", + " \"algorithm\": \"matd3\",\n", + " \"learning_rate\": 0.001,\n", + " \"training_episodes\": 2,\n", + " \"episodes_collecting_initial_experience\": 1,\n", + " \"train_freq\": \"24h\",\n", + " \"gradient_steps\": -1,\n", + " \"batch_size\": 256,\n", + " \"gamma\": 0.99,\n", + " \"device\": \"cpu\",\n", + " \"noise_sigma\": 0.1,\n", + " \"noise_scale\": 1,\n", + " \"noise_dt\": 1,\n", + " \"validation_episodes_interval\": 5,\n", + "}\n", + "\n", + "# Read the YAML file\n", + "with open(f\"{inputs_path}/example_02a/config.yaml\") as file:\n", + " data = yaml.safe_load(file)\n", + "\n", + "# store our modifications to the config file\n", + "data[\"base\"][\"learning_mode\"] = True\n", + "data[\"base\"][\"learning_config\"] = learning_config\n", + "\n", + "# Write the modified data back to the file\n", + "with open(f\"{inputs_path}/example_02a/config.yaml\", \"w\") as file:\n", + " yaml.safe_dump(data, file)\n", + "\n", + "# Read the YAML file\n", + "with open(f\"{inputs_path}/example_02b/config.yaml\") as file:\n", + " data = yaml.safe_load(file)\n", + "\n", + "# store our modifications to the config file\n", + "data[\"base\"][\"learning_mode\"] = True\n", + "data[\"base\"][\"learning_config\"] = learning_config\n", + "\n", + "# Write the modified data back to the file\n", + "with open(f\"{inputs_path}/example_02b/config.yaml\", \"w\") as file:\n", + " yaml.safe_dump(data, file)\n", + "\n", + "# Read the YAML file\n", + "with open(f\"{inputs_path}/example_02c/config.yaml\") as file:\n", + " data = yaml.safe_load(file)\n", + "\n", + "# store our modifications to the config file\n", + "data[\"base\"][\"learning_mode\"] = True\n", + "data[\"base\"][\"learning_config\"] = learning_config\n", + "\n", + "# Write the modified data back to the file\n", + "with open(f\"{inputs_path}/example_02c/config.yaml\", \"w\") as file:\n", + " yaml.safe_dump(data, file)\n", + "\n", + "log = logging.getLogger(__name__)\n", + "\n", + "csv_path = \"outputs\"\n", + "os.makedirs(\"local_db\", exist_ok=True)\n", + "\n", + "if __name__ == \"__main__\":\n", + " db_uri = \"sqlite:///local_db/assume_db.db\"\n", + "\n", + " scenario = \"example_02a\"\n", + " study_case = \"base\"\n", + "\n", + " # create world\n", + " world = World(database_uri=db_uri, export_csv_path=csv_path)\n", + "\n", + " # we import our defined bidding strategey class including the learning into the world bidding strategies\n", + " # in the example files we provided the name of the learning bidding strategies in the input csv \"pp_learning\"\n", + " # hence we define this strategey to be the one of the learning class\n", + " world.bidding_strategies[\"pp_learning\"] = RLStrategy\n", + "\n", + " # then we load the scenario specified above from the respective input files\n", + " load_scenario_folder(\n", + " world,\n", + " inputs_path=inputs_path,\n", + " scenario=scenario,\n", + " study_case=study_case,\n", + " )\n", + "\n", + " # run learning if learning mode is enabled\n", + " # needed as we simulate the modelling horizon multiple times to train reinforcement learning run_learning( world, inputs_path=input_path, scenario=scenario, study_case=study_case, )\n", + "\n", + " if world.learning_config.get(\"learning_mode\", False):\n", + " run_learning(\n", + " world,\n", + " inputs_path=inputs_path,\n", + " scenario=scenario,\n", + " study_case=study_case,\n", + " )\n", + "\n", + " # after the learning is done we make a normal run of the simulation, which equals a test run\n", + " world.run()\n", + "\n", + "log = logging.getLogger(__name__)\n", + "\n", + "csv_path = \"outputs\"\n", + "os.makedirs(\"local_db\", exist_ok=True)\n", + "\n", + "if __name__ == \"__main__\":\n", + " db_uri = \"sqlite:///local_db/assume_db.db\"\n", + "\n", + " scenario = \"example_02b\"\n", + " study_case = \"base\"\n", + "\n", + " # create world\n", + " world = World(database_uri=db_uri, export_csv_path=csv_path)\n", + "\n", + " # we import our defined bidding strategey class including the learning into the world bidding strategies\n", + " # in the example files we provided the name of the learning bidding strategeis in the input csv is \"pp_learning\"\n", + " # hence we define this strategey to be one of the learning class\n", + " world.bidding_strategies[\"pp_learning\"] = RLStrategy\n", + "\n", + " # then we load the scenario specified above from the respective input files\n", + " load_scenario_folder(\n", + " world,\n", + " inputs_path=inputs_path,\n", + " scenario=scenario,\n", + " study_case=study_case,\n", + " )\n", + "\n", + " # run learning if learning mode is enabled\n", + " # needed as we simulate the modelling horizon multiple times to train reinforcement learning run_learning( world, inputs_path=input_path, scenario=scenario, study_case=study_case, )\n", + "\n", + " if world.learning_config.get(\"learning_mode\", False):\n", + " run_learning(\n", + " world,\n", + " inputs_path=inputs_path,\n", + " scenario=scenario,\n", + " study_case=study_case,\n", + " )\n", + "\n", + " # after the learning is done we make a normal run of the simulation, which equals a test run\n", + " world.run()\n", + "\n", + "log = logging.getLogger(__name__)\n", + "\n", + "csv_path = \"outputs\"\n", + "os.makedirs(\"local_db\", exist_ok=True)\n", + "\n", + "if __name__ == \"__main__\":\n", + " db_uri = \"sqlite:///local_db/assume_db.db\"\n", + "\n", + " scenario = \"example_02c\"\n", + " study_case = \"base\"\n", + "\n", + " # create world\n", + " world = World(database_uri=db_uri, export_csv_path=csv_path)\n", + "\n", + " # we import our defined bidding strategey class including the learning into the world bidding strategies\n", + " # in the example files we provided the name of the learning bidding strategeis in the input csv is \"pp_learning\"\n", + " # hence we define this strategey to be one of the learning class\n", + " world.bidding_strategies[\"pp_learning\"] = RLStrategy\n", + "\n", + " # then we load the scenario specified above from the respective input files\n", + " load_scenario_folder(\n", + " world,\n", + " inputs_path=inputs_path,\n", + " scenario=scenario,\n", + " study_case=study_case,\n", + " )\n", + "\n", + " # run learning if learning mode is enabled\n", + " # needed as we simulate the modelling horizon multiple times to train reinforcement learning run_learning( world, inputs_path=input_path, scenario=scenario, study_case=study_case, )\n", + "\n", + " if world.learning_config.get(\"learning_mode\", False):\n", + " run_learning(\n", + " world,\n", + " inputs_path=inputs_path,\n", + " scenario=scenario,\n", + " study_case=study_case,\n", + " )\n", + "\n", + " # after the learning is done we make a normal run of the simulation, which equals a test run\n", + " world.run()\n", + "\n", + "!pip install matplotlib\n", + "\n", + "import os\n", + "from functools import partial\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from sqlalchemy import create_engine\n", + "\n", + "os.makedirs(\"outputs\", exist_ok=True)\n", + "\n", + "db_uri = \"sqlite:///local_db/assume_db.db\"\n", + "\n", + "engine = create_engine(db_uri)\n", + "\n", + "\n", + "sql = \"\"\"\n", + "SELECT ident, simulation,\n", + "sum(round(CAST(value AS numeric), 2)) FILTER (WHERE variable = 'total_cost') as total_cost,\n", + "sum(round(CAST(value AS numeric), 2)*1000) FILTER (WHERE variable = 'total_volume') as total_volume,\n", + "sum(round(CAST(value AS numeric), 2)) FILTER (WHERE variable = 'avg_price') as average_cost\n", + "FROM kpis\n", + "where variable in ('total_cost', 'total_volume', 'avg_price')\n", + "and simulation in ('example_02a_base', 'example_02b_base', 'example_02c_base')\n", + "group by simulation, ident ORDER BY simulation\n", + "\"\"\"\n", + "\n", + "\n", + "kpis = pd.read_sql(sql, engine)\n", + "\n", + "# sort the dataframe to have sho, bo and lo case in the right order\n", + "\n", + "# sort kpis in the order sho, bo, lo\n", + "\n", + "kpis = kpis.sort_values(\n", + " by=\"simulation\",\n", + " # key=lambda x: x.map({\"example_02a\": 1, \"example_02b\": 2, \"example_02c\": 3}),\n", + ")\n", + "\n", + "\n", + "kpis[\"total_volume\"] /= 1e9\n", + "kpis[\"total_cost\"] /= 1e6\n", + "savefig = partial(plt.savefig, transparent=False, bbox_inches=\"tight\")\n", + "\n", + "xticks = kpis[\"simulation\"].unique()\n", + "plt.style.use(\"seaborn-v0_8\")\n", + "\n", + "fig, ax = plt.subplots(1, 1, figsize=(10, 6))\n", + "\n", + "ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.\n", + "\n", + "width = 0.4\n", + "\n", + "kpis.total_volume.plot(kind=\"bar\", ax=ax, width=width, position=1, color=\"royalblue\")\n", + "kpis.total_cost.plot(kind=\"bar\", ax=ax2, width=width, position=0, color=\"green\")\n", + "\n", + "# set x-achxis limits\n", + "ax.set_xlim(-0.6, len(kpis[\"simulation\"]) - 0.4)\n", + "\n", + "# set y-achxis limits\n", + "ax.set_ylim(0, max(kpis.total_volume) * 1.1 + 0.1)\n", + "ax2.set_ylim(0, max(kpis.total_cost) * 1.1 + 0.1)\n", + "\n", + "ax.set_ylabel(\"Total Volume (GWh)\")\n", + "ax2.set_ylabel(\"Total Cost (M€)\")\n", + "\n", + "ax.set_xticklabels(xticks, rotation=45)\n", + "ax.set_xlabel(\"Simulation\")\n", + "\n", + "ax.legend([\"Total Volume\"], loc=\"upper left\")\n", + "ax2.legend([\"Total Cost\"], loc=\"upper right\")\n", + "\n", + "plt.title(\"Total Volume and Total Cost for each Simulation\")\n", + "\n", + "sql = \"\"\"\n", + "SELECT\n", + " product_start AS \"time\",\n", + " price AS \"Price\",\n", + " simulation AS \"simulation\",\n", + " node\n", + "FROM market_meta\n", + "WHERE simulation in ('example_02a_base', 'example_02b_base', 'example_02c_base') AND market_id in ('EOM') \n", + "GROUP BY market_id, simulation, product_start, price, node\n", + "ORDER BY product_start, node\n", + "\n", + "\"\"\"\n", + "\n", + "df = pd.read_sql(sql, engine)\n", + "\n", + "df\n", + "\n", + "# Convert the 'time' column to datetime\n", + "df[\"time\"] = pd.to_datetime(df[\"time\"])\n", + "\n", + "# Plot the data\n", + "plt.figure(figsize=(14, 7))\n", + "# Loop through each simulation and plot\n", + "for simulation in df[\"simulation\"].unique():\n", + " subset = df[df[\"simulation\"] == simulation]\n", + " plt.plot(subset[\"time\"], subset[\"Price\"], label=simulation)\n", + "\n", + "plt.title(\"Price over Time for Different Simulations\")\n", + "plt.xlabel(\"Time\")\n", + "plt.ylabel(\"Price\")\n", + "plt.legend(title=\"Simulation\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97f4d181", + "metadata": {}, + "outputs": [], "source": [] } ],