From 2bd2ca77a4ddd3f0b1625ac7bda988e2295976d0 Mon Sep 17 00:00:00 2001
From: AndreasEppler <A.Eppler97@web.de>
Date: Wed, 20 Nov 2024 18:01:15 +0100
Subject: [PATCH] Added final cell with all code for execution without pasting
 the quiz solutions.

---
 .../04_reinforcement_learning_example.ipynb   | 895 +++++++++++++++++-
 1 file changed, 888 insertions(+), 7 deletions(-)

diff --git a/examples/notebooks/04_reinforcement_learning_example.ipynb b/examples/notebooks/04_reinforcement_learning_example.ipynb
index d395bbae..353cf9fc 100644
--- a/examples/notebooks/04_reinforcement_learning_example.ipynb
+++ b/examples/notebooks/04_reinforcement_learning_example.ipynb
@@ -300,6 +300,7 @@
    "source": [
     "import logging\n",
     "import os\n",
+    "import yaml\n",
     "from datetime import datetime, timedelta\n",
     "from pathlib import Path\n",
     "\n",
@@ -564,18 +565,18 @@
     "        # =============================================================================\n",
     "        # 1.1 Get the Observations, which are the basis of the action decision\n",
     "        # =============================================================================\n",
+    "        \n",
     "        # residual load forecast\n",
-    "        # residual load forecast\n",
-    "        scaling_factor_res_load = self.max_demand\n",
+    "        scaling_factor_res_load = None #TODO\n",
     "\n",
     "        # price forecast\n",
-    "        scaling_factor_price = self.max_bid_price\n",
+    "        scaling_factor_price = None #TODO\n",
     "\n",
     "        # total capacity\n",
-    "        scaling_factor_total_capacity = unit.max_power\n",
+    "        scaling_factor_total_capacity = None #TODO\n",
     "\n",
     "        # marginal cost\n",
-    "        scaling_factor_marginal_cost = self.max_bid_price\n",
+    "        scaling_factor_marginal_cost = None #TODO\n",
     "\n",
     "        # checks if we are at the end of the simulation horizon, since we need to change the forecast then\n",
     "        # for residual load and price forecast and scale them\n",
@@ -775,7 +776,7 @@
     "                # =============================================================================\n",
     "                # ==> YOUR CODE HERE\n",
     "                base_bid = None  # TODO\n",
-    "                # add niose to the last dimension of the observation\n",
+    "                # add noise to the last dimension of the observation\n",
     "                # needs to be adjusted if observation space is changed, because only makes sense\n",
     "                # if the last dimension of the observation space are the marginal cost\n",
     "                curr_action = noise + base_bid.clone().detach()\n",
@@ -1287,7 +1288,7 @@
    "source": [
     "learning_config = {\n",
     "    \"continue_learning\": False,\n",
-    "    \"trained_policies_save_path\": \"null\",\n",
+    "    \"trained_policies_save_path\": None,\n",
     "    \"max_bid_price\": 100,\n",
     "    \"algorithm\": \"matd3\",\n",
     "    \"learning_rate\": 0.001,\n",
@@ -1305,6 +1306,26 @@
     "}"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bac01731",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read the YAML file\n",
+    "with open(f\"{inputs_path}/example_02a/config.yaml\") as file:\n",
+    "    data = yaml.safe_load(file)\n",
+    "\n",
+    "# store our modifications to the config file\n",
+    "data[\"base\"][\"learning_mode\"] = True\n",
+    "data[\"base\"][\"learning_config\"] = learning_config\n",
+    "\n",
+    "# Write the modified data back to the file\n",
+    "with open(f\"{inputs_path}/example_02a/config.yaml\", \"w\") as file:\n",
+    "    yaml.safe_dump(data, file)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "132f9429",
@@ -1662,6 +1683,866 @@
     "lines_to_next_cell": 2
    },
    "outputs": [],
+   "source": [
+    "# @title Complete notebook code with tasks already filled in\n",
+    "\n",
+    "# this cell is used to display the image in the notebook when using colab\n",
+    "# or running the notebook locally\n",
+    "\n",
+    "import os\n",
+    "\n",
+    "import importlib.util\n",
+    "\n",
+    "# Check if 'google.colab' is available\n",
+    "IN_COLAB = importlib.util.find_spec(\"google.colab\") is not None\n",
+    "\n",
+    "if IN_COLAB:\n",
+    "    !pip install 'assume-framework[learning]'\n",
+    "    # Colab currently has issues with pyomo version 6.8.2, causing the notebook to crash\n",
+    "    # Installing an older version resolves this issue. This should only be considered a temporary fix.\n",
+    "    !pip install pyomo==6.8.0\n",
+    "    !git clone --depth=1 https://github.com/assume-framework/assume.git assume-repo\n",
+    "    !cd assume-repo && assume -s example_01b -db \"sqlite:///./examples/local_db/assume_db_example_01b.db\"\n",
+    "\n",
+    "colab_inputs_path = \"assume-repo/examples/inputs\"\n",
+    "local_inputs_path = \"../inputs\"\n",
+    "\n",
+    "inputs_path = colab_inputs_path if IN_COLAB else local_inputs_path  \n",
+    "\n",
+    "import logging\n",
+    "import os\n",
+    "import yaml\n",
+    "from datetime import datetime, timedelta\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import torch as th\n",
+    "\n",
+    "from assume import World\n",
+    "from assume.common.base import LearningStrategy, SupportsMinMax\n",
+    "from assume.common.market_objects import MarketConfig, Orderbook, Product\n",
+    "from assume.reinforcement_learning.algorithms import actor_architecture_aliases\n",
+    "from assume.reinforcement_learning.learning_utils import NormalActionNoise\n",
+    "from assume.scenario.loader_csv import load_scenario_folder, run_learning\n",
+    "\n",
+    "class RLStrategy(LearningStrategy):\n",
+    "    \"\"\"\n",
+    "    Reinforcement Learning Strategy\n",
+    "    \"\"\"\n",
+    "\n",
+    "    def __init__(self, *args, **kwargs):\n",
+    "        super().__init__(obs_dim=50, act_dim=2, unique_obs_dim=2, *args, **kwargs)\n",
+    "\n",
+    "        self.unit_id = kwargs[\"unit_id\"]\n",
+    "\n",
+    "        # defines bounds of actions space\n",
+    "        self.max_bid_price = kwargs.get(\"max_bid_price\", 100)\n",
+    "        self.max_demand = kwargs.get(\"max_demand\", 10e3)\n",
+    "\n",
+    "        # tells us whether we are training the agents or just executing per-learnind strategies\n",
+    "        self.learning_mode = kwargs.get(\"learning_mode\", False)\n",
+    "        self.perform_evaluation = kwargs.get(\"perform_evaluation\", False)\n",
+    "\n",
+    "        # based on learning config define algorithm configuration\n",
+    "        self.algorithm = kwargs.get(\"algorithm\", \"matd3\")\n",
+    "        actor_architecture = kwargs.get(\"actor_architecture\", \"mlp\")\n",
+    "\n",
+    "        # define the architecture of the actor neural network\n",
+    "        # if you use many time series niputs you might want to use the LSTM instead of the MLP for example\n",
+    "        if actor_architecture in actor_architecture_aliases.keys():\n",
+    "            self.actor_architecture_class = actor_architecture_aliases[\n",
+    "                actor_architecture\n",
+    "            ]\n",
+    "        else:\n",
+    "            raise ValueError(\n",
+    "                f\"Policy '{actor_architecture}' unknown. Supported architectures are {list(actor_architecture_aliases.keys())}\"\n",
+    "            )\n",
+    "\n",
+    "        # sets the devide of the actor network\n",
+    "        device = kwargs.get(\"device\", \"cpu\")\n",
+    "        self.device = th.device(device if th.cuda.is_available() else \"cpu\")\n",
+    "        if not self.learning_mode:\n",
+    "            self.device = th.device(\"cpu\")\n",
+    "\n",
+    "        # future: add option to choose between float16 and float32\n",
+    "        # float_type = kwargs.get(\"float_type\", \"float32\")\n",
+    "        self.float_type = th.float\n",
+    "\n",
+    "        # for definition of observation space\n",
+    "        self.foresight = kwargs.get(\"foresight\", 24)\n",
+    "\n",
+    "        if self.learning_mode:\n",
+    "            self.learning_role = None\n",
+    "            self.collect_initial_experience_mode = kwargs.get(\n",
+    "                \"episodes_collecting_initial_experience\", True\n",
+    "            )\n",
+    "\n",
+    "            self.action_noise = NormalActionNoise(\n",
+    "                mu=0.0,\n",
+    "                sigma=kwargs.get(\"noise_sigma\", 0.1),\n",
+    "                action_dimension=self.act_dim,\n",
+    "                scale=kwargs.get(\"noise_scale\", 1.0),\n",
+    "                dt=kwargs.get(\"noise_dt\", 1.0),\n",
+    "            )\n",
+    "\n",
+    "        elif Path(load_path=kwargs[\"trained_policies_save_path\"]).is_dir():\n",
+    "            self.load_actor_params(load_path=kwargs[\"trained_policies_save_path\"])\n",
+    "\n",
+    "# we define the class again and inherit from the initial class just to add the additional method to the original class\n",
+    "# this is a workaround to have different methods of the class in different cells\n",
+    "# which is good for the purpose of this tutorial\n",
+    "# however, you should have all functions in a single class when using this example in .py files\n",
+    "\n",
+    "\n",
+    "class RLStrategy(RLStrategy):\n",
+    "    def calculate_bids(\n",
+    "        self,\n",
+    "        unit: SupportsMinMax,\n",
+    "        market_config: MarketConfig,\n",
+    "        product_tuples: list[Product],\n",
+    "        **kwargs,\n",
+    "    ) -> Orderbook:\n",
+    "        \"\"\"\n",
+    "        Calculate bids for a unit -> STEP 1 & 2\n",
+    "        \"\"\"\n",
+    "\n",
+    "        start = product_tuples[0][0]\n",
+    "        end = product_tuples[0][1]\n",
+    "        # get technical bounds for the unit output from the unit\n",
+    "        min_power, max_power = unit.calculate_min_max_power(start, end)\n",
+    "        min_power = min_power[start]\n",
+    "        max_power = max_power[start]\n",
+    "\n",
+    "        # =============================================================================\n",
+    "        # 1. Get the Observations, which are the basis of the action decision\n",
+    "        # =============================================================================\n",
+    "        next_observation = self.create_observation(\n",
+    "            unit=unit,\n",
+    "            market_id=market_config.market_id,\n",
+    "            start=start,\n",
+    "            end=end,\n",
+    "        )\n",
+    "\n",
+    "        # =============================================================================\n",
+    "        # 2. Get the Actions, based on the observations\n",
+    "        # =============================================================================\n",
+    "        actions, noise = self.get_actions(next_observation)\n",
+    "\n",
+    "        bids = actions\n",
+    "\n",
+    "        bids = self.remove_empty_bids(bids)\n",
+    "\n",
+    "        return bids\n",
+    "    \n",
+    "# we define the class again and inherit from the initial class just to add the additional method to the original class\n",
+    "# this is a workaround to have different methods of the class in different cells\n",
+    "# which is good for the purpose of this tutorial\n",
+    "# however, you should have all functions in a single class when using this example in .py files\n",
+    "\n",
+    "\n",
+    "class RLStrategy(RLStrategy):\n",
+    "    def calculate_reward(\n",
+    "        self,\n",
+    "        unit,\n",
+    "        marketconfig: MarketConfig,\n",
+    "        orderbook: Orderbook,\n",
+    "    ):\n",
+    "        \"\"\"\n",
+    "        Calculate reward\n",
+    "        \"\"\"\n",
+    "\n",
+    "        return None\n",
+    "    \n",
+    "# we define the class again and inherit from the initial class just to add the additional method to the original class\n",
+    "# this is a workaround to have different methods of the class in different cells\n",
+    "# which is good for the purpose of this tutorial\n",
+    "# however, you should have all functions in a single class when using this example in .py files\n",
+    "\n",
+    "class RLStrategy(RLStrategy):\n",
+    "    def create_observation(\n",
+    "        self,\n",
+    "        unit: SupportsMinMax,\n",
+    "        market_id: str,\n",
+    "        start: datetime,\n",
+    "        end: datetime,\n",
+    "    ):\n",
+    "        \"\"\"\n",
+    "        Create observation\n",
+    "        \"\"\"\n",
+    "\n",
+    "        end_excl = end - unit.index.freq\n",
+    "\n",
+    "        # get the forecast length depending on the time unit considered in the modelled unit\n",
+    "        forecast_len = pd.Timedelta((self.foresight - 1) * unit.index.freq)\n",
+    "\n",
+    "        # =============================================================================\n",
+    "        # 1.1 Get the Observations, which are the basis of the action decision\n",
+    "        # =============================================================================\n",
+    "        \n",
+    "        # residual load forecast\n",
+    "        scaling_factor_res_load = self.max_demand\n",
+    "\n",
+    "        # price forecast\n",
+    "        scaling_factor_price = self.max_bid_price\n",
+    "\n",
+    "        # total capacity\n",
+    "        scaling_factor_total_capacity = unit.max_power\n",
+    "\n",
+    "        # marginal cost\n",
+    "        scaling_factor_marginal_cost = self.max_bid_price\n",
+    "\n",
+    "        # checks if we are at the end of the simulation horizon, since we need to change the forecast then\n",
+    "        # for residual load and price forecast and scale them\n",
+    "        if (\n",
+    "            end_excl + forecast_len\n",
+    "            > unit.forecaster[f\"residual_load_{market_id}\"].index[-1]\n",
+    "        ):\n",
+    "            scaled_res_load_forecast = (\n",
+    "                unit.forecaster[f\"residual_load_{market_id}\"].loc[start:].values\n",
+    "                / scaling_factor_res_load\n",
+    "            )\n",
+    "            scaled_res_load_forecast = np.concatenate(\n",
+    "                [\n",
+    "                    scaled_res_load_forecast,\n",
+    "                    unit.forecaster[f\"residual_load_{market_id}\"].iloc[\n",
+    "                        : self.foresight - len(scaled_res_load_forecast)\n",
+    "                    ],\n",
+    "                ]\n",
+    "            )\n",
+    "\n",
+    "        else:\n",
+    "            scaled_res_load_forecast = (\n",
+    "                unit.forecaster[f\"residual_load_{market_id}\"]\n",
+    "                .loc[start : end_excl + forecast_len]\n",
+    "                .values\n",
+    "                / scaling_factor_res_load\n",
+    "            )\n",
+    "\n",
+    "        if end_excl + forecast_len > unit.forecaster[f\"price_{market_id}\"].index[-1]:\n",
+    "            scaled_price_forecast = (\n",
+    "                unit.forecaster[f\"price_{market_id}\"].loc[start:].values\n",
+    "                / scaling_factor_price\n",
+    "            )\n",
+    "            scaled_price_forecast = np.concatenate(\n",
+    "                [\n",
+    "                    scaled_price_forecast,\n",
+    "                    unit.forecaster[f\"price_{market_id}\"].iloc[\n",
+    "                        : self.foresight - len(scaled_price_forecast)\n",
+    "                    ],\n",
+    "                ]\n",
+    "            )\n",
+    "\n",
+    "        else:\n",
+    "            scaled_price_forecast = (\n",
+    "                unit.forecaster[f\"price_{market_id}\"]\n",
+    "                .loc[start : end_excl + forecast_len]\n",
+    "                .values\n",
+    "                / scaling_factor_price\n",
+    "            )\n",
+    "\n",
+    "        # get last accepted bid volume and the current marginal costs of the unit\n",
+    "        current_volume = unit.get_output_before(start)\n",
+    "        current_costs = unit.calc_marginal_cost_with_partial_eff(current_volume, start)\n",
+    "\n",
+    "        # scale unit outputs\n",
+    "        scaled_total_capacity = current_volume / scaling_factor_total_capacity\n",
+    "        scaled_marginal_cost = current_costs / scaling_factor_marginal_cost\n",
+    "\n",
+    "        # concat all obsverations into one array\n",
+    "        observation = np.concatenate(\n",
+    "            [\n",
+    "                scaled_res_load_forecast,\n",
+    "                scaled_price_forecast,\n",
+    "                np.array([scaled_total_capacity, scaled_marginal_cost]),\n",
+    "            ]\n",
+    "        )\n",
+    "\n",
+    "        # transfer array to GPU for NN processing\n",
+    "        observation = (\n",
+    "            th.tensor(observation, dtype=self.float_type)\n",
+    "            .to(self.device, non_blocking=True)\n",
+    "            .view(-1)\n",
+    "        )\n",
+    "\n",
+    "        return observation.detach().clone()\n",
+    "\n",
+    "# we define the class again and inherit from the initial class just to add the additional method to the original class\n",
+    "# this is a workaround to have different methods of the class in different cells\n",
+    "# which is good for the purpose of this tutorial\n",
+    "# however, you should have all functions in a single class when using this example in .py files\n",
+    "\n",
+    "\n",
+    "class RLStrategy(RLStrategy):\n",
+    "    def get_actions(self, next_observation):\n",
+    "        \"\"\"\n",
+    "        Get actions\n",
+    "        \"\"\"\n",
+    "\n",
+    "        # distinction whether we are in learning mode or not to handle exploration realised with noise\n",
+    "        if self.learning_mode:\n",
+    "            # if we are in learning mode, the first x episodes we want to explore the entire action space\n",
+    "            # to get a good initial experience in the area around the costs of the agent\n",
+    "            if self.collect_initial_experience_mode:\n",
+    "                # define current action as solely noise\n",
+    "                noise = (\n",
+    "                    th.normal(\n",
+    "                        mean=0.0, std=0.2, size=(1, self.act_dim), dtype=self.float_type\n",
+    "                    )\n",
+    "                    .to(self.device)\n",
+    "                    .squeeze()\n",
+    "                )\n",
+    "\n",
+    "                # =============================================================================\n",
+    "                # 2.1 Get Actions and handle exploration\n",
+    "                # =============================================================================\n",
+    "                # ==> YOUR CODE HERE\n",
+    "                base_bid = next_observation[-1] # = marginal_costs\n",
+    "                # add noise to the last dimension of the observation\n",
+    "                # needs to be adjusted if observation space is changed, because only makes sense\n",
+    "                # if the last dimension of the observation space are the marginal cost\n",
+    "                curr_action = noise + base_bid.clone().detach()\n",
+    "\n",
+    "            else:\n",
+    "                # if we are not in the initial exploration phase we chose the action with the actor neuronal net\n",
+    "                # and add noise to the action\n",
+    "                curr_action = self.actor(next_observation).detach()\n",
+    "                noise = th.tensor(\n",
+    "                    self.action_noise.noise(), device=self.device, dtype=self.float_type\n",
+    "                )\n",
+    "                curr_action += noise\n",
+    "        else:\n",
+    "            # if we are not in learning mode we just use the actor neuronal net to get the action without adding noise\n",
+    "\n",
+    "            curr_action = self.actor(next_observation).detach()\n",
+    "            noise = tuple(0 for _ in range(self.act_dim))\n",
+    "\n",
+    "        curr_action = curr_action.clamp(-1, 1)\n",
+    "\n",
+    "        return curr_action, noise\n",
+    "    \n",
+    "# we define the class again and inherit from the initial class just to add the additional method to the original class\n",
+    "# this is a workaround to have different methods of the class in different cells\n",
+    "# which is good for the purpose of this tutorial\n",
+    "# however, you should have all functions in a single class when using this example in .py files\n",
+    "\n",
+    "\n",
+    "class RLStrategy(RLStrategy):\n",
+    "    def calculate_bids(\n",
+    "        self,\n",
+    "        unit: SupportsMinMax,\n",
+    "        market_config: MarketConfig,\n",
+    "        product_tuples: list[Product],\n",
+    "        **kwargs,\n",
+    "    ) -> Orderbook:\n",
+    "        \"\"\"\n",
+    "        Calculate bids for a unit\n",
+    "        \"\"\"\n",
+    "\n",
+    "        bid_quantity_inflex, bid_price_inflex = 0, 0\n",
+    "        bid_quantity_flex, bid_price_flex = 0, 0\n",
+    "\n",
+    "        start = product_tuples[0][0]\n",
+    "        end = product_tuples[0][1]\n",
+    "        # get technical bounds for the unit output from the unit\n",
+    "        min_power, max_power = unit.calculate_min_max_power(start, end)\n",
+    "        min_power = min_power[start]\n",
+    "        max_power = max_power[start]\n",
+    "\n",
+    "        # =============================================================================\n",
+    "        # 1. Get the Observations, which are the basis of the action decision\n",
+    "        # =============================================================================\n",
+    "        next_observation = self.create_observation(\n",
+    "            unit=unit,\n",
+    "            market_id=market_config.market_id,\n",
+    "            start=start,\n",
+    "            end=end,\n",
+    "        )\n",
+    "\n",
+    "        # =============================================================================\n",
+    "        # 2. Get the Actions, based on the observations\n",
+    "        # =============================================================================\n",
+    "        actions, noise = self.get_actions(next_observation)\n",
+    "\n",
+    "        bids = actions\n",
+    "\n",
+    "        # =============================================================================\n",
+    "        # 3.2 Transform Actions into bids\n",
+    "        # =============================================================================\n",
+    "        # ==> YOUR CODE HERE\n",
+    "        # actions are in the range [0,1], we need to transform them into actual bids\n",
+    "        # we can use our domain knowledge to guide the bid formulation\n",
+    "\n",
+    "        #calculate actual bids\n",
+    "        #rescale actions to actual prices\n",
+    "        bid_prices = actions * self.max_bid_price\n",
+    "\n",
+    "        #calculate inflexible part of the bid\n",
+    "        bid_quantity_inflex = min_power\n",
+    "        bid_price_inflex = min(bid_prices)\n",
+    "\n",
+    "        #calculate flexible part of the bid\n",
+    "        bid_quantity_flex = max_power - bid_quantity_inflex\n",
+    "        bid_price_flex = max(bid_prices)\n",
+    "\n",
+    "        # actually formulate bids in orderbook format\n",
+    "        bids = [\n",
+    "            {\n",
+    "                \"start_time\": start,\n",
+    "                \"end_time\": end,\n",
+    "                \"only_hours\": None,\n",
+    "                \"price\": bid_price_inflex,\n",
+    "                \"volume\": bid_quantity_inflex,\n",
+    "            },\n",
+    "            {\n",
+    "                \"start_time\": start,\n",
+    "                \"end_time\": end,\n",
+    "                \"only_hours\": None,\n",
+    "                \"price\": bid_price_flex,\n",
+    "                \"volume\": bid_quantity_flex,\n",
+    "            },\n",
+    "        ]\n",
+    "\n",
+    "        # store results in unit outputs as lists to be written to the buffer for learning\n",
+    "        unit.outputs[\"rl_observations\"].append(next_observation)\n",
+    "        unit.outputs[\"rl_actions\"].append(actions)\n",
+    "\n",
+    "        # store results in unit outputs as series to be written to the database by the unit operator\n",
+    "        unit.outputs[\"actions\"][start] = actions\n",
+    "        unit.outputs[\"exploration_noise\"][start] = noise\n",
+    "\n",
+    "        bids = self.remove_empty_bids(bids)\n",
+    "\n",
+    "        return bids\n",
+    "    \n",
+    "# we define the class again and inherit from the initial class just to add the additional method to the original class\n",
+    "# this is a workaround to have different methods of the class in different cells\n",
+    "# which is good for the purpose of this tutorial\n",
+    "# however, you should have all functions in a single class when using this example in .py files\n",
+    "\n",
+    "\n",
+    "class RLStrategy(RLStrategy):\n",
+    "    def calculate_reward(\n",
+    "        self,\n",
+    "        unit,\n",
+    "        marketconfig: MarketConfig,\n",
+    "        orderbook: Orderbook,\n",
+    "    ):\n",
+    "        \"\"\"\n",
+    "        Calculate reward\n",
+    "        \"\"\"\n",
+    "\n",
+    "        # =============================================================================\n",
+    "        # 3. Calculate Reward\n",
+    "        # =============================================================================\n",
+    "        # function is called after the market is cleared and we get the market feedback,\n",
+    "        # so we can calculate the profit\n",
+    "\n",
+    "        product_type = marketconfig.product_type\n",
+    "\n",
+    "        profit = 0\n",
+    "        reward = 0\n",
+    "        opportunity_cost = 0\n",
+    "\n",
+    "        # iterate over all orders in the orderbook, to calculate order specific profit\n",
+    "        for order in orderbook:\n",
+    "            start = order[\"start_time\"]\n",
+    "            end = order[\"end_time\"]\n",
+    "            end_excl = end - unit.index.freq\n",
+    "\n",
+    "            # depending on whether the unit calaculates marginal costs we take costs\n",
+    "            if unit.marginal_cost is not None:\n",
+    "                marginal_cost = (\n",
+    "                    unit.marginal_cost[start]\n",
+    "                    if len(unit.marginal_cost) > 1\n",
+    "                    else unit.marginal_cost\n",
+    "                )\n",
+    "            else:\n",
+    "                marginal_cost = unit.calc_marginal_cost_with_partial_eff(\n",
+    "                    power_output=unit.outputs[product_type].loc[start:end_excl],\n",
+    "                    timestep=start,\n",
+    "                )\n",
+    "\n",
+    "            duration = (end - start) / timedelta(hours=1)\n",
+    "\n",
+    "            # calculate profit as income - running_cost from this event\n",
+    "            price_difference = order[\"accepted_price\"] - marginal_cost\n",
+    "            order_profit = price_difference * order[\"accepted_volume\"] * duration\n",
+    "\n",
+    "            # calculate opportunity cost\n",
+    "            # as the loss of income we have because we are not running at full power\n",
+    "            order_opportunity_cost = (\n",
+    "                price_difference\n",
+    "                * (\n",
+    "                    unit.max_power - unit.outputs[product_type].loc[start:end_excl]\n",
+    "                ).sum()\n",
+    "                * duration\n",
+    "            )\n",
+    "\n",
+    "            # if our opportunity costs are negative, we did not miss an opportunity to earn money and we set them to 0\n",
+    "            order_opportunity_cost = max(order_opportunity_cost, 0)\n",
+    "\n",
+    "            # collect profit and opportunity cost for all orders\n",
+    "            opportunity_cost += order_opportunity_cost\n",
+    "            profit += order_profit\n",
+    "\n",
+    "        # consideration of start-up costs, which are evenly divided between the\n",
+    "        # upward and downward regulation events\n",
+    "        if (\n",
+    "            unit.outputs[product_type].loc[start] != 0\n",
+    "            and unit.outputs[product_type].loc[start - unit.index.freq] == 0\n",
+    "        ):\n",
+    "            profit = profit - unit.hot_start_cost / 2\n",
+    "        elif (\n",
+    "            unit.outputs[product_type].loc[start] == 0\n",
+    "            and unit.outputs[product_type].loc[start - unit.index.freq] != 0\n",
+    "        ):\n",
+    "            profit = profit - unit.hot_start_cost / 2\n",
+    "\n",
+    "        # =============================================================================\n",
+    "        # =============================================================================\n",
+    "        # ==> YOUR CODE HERE\n",
+    "        # The straight forward implementation would be reward = profit, yet we would like to give the agent more guidance\n",
+    "        # in the learning process, so we add a regret term to the reward, which is the opportunity cost\n",
+    "        # define the reward and scale it\n",
+    "\n",
+    "        scaling = 0.1 / unit.max_power\n",
+    "        regret_scale = 0.2\n",
+    "        reward = float(profit - regret_scale * opportunity_cost) * scaling\n",
+    "\n",
+    "        # store results in unit outputs which are written to database by unit operator\n",
+    "        unit.outputs[\"profit\"].loc[start:end_excl] += profit\n",
+    "        unit.outputs[\"reward\"].loc[start:end_excl] = reward\n",
+    "        unit.outputs[\"regret\"].loc[start:end_excl] = opportunity_cost\n",
+    "\n",
+    "# we define the class again and inherit from the initial class just to add the additional method to the original class\n",
+    "# this is a workaround to have different methods of the class in different cells\n",
+    "# which is good for the purpose of this tutorial\n",
+    "# however, you should have all functions in a single class when using this example in .py files\n",
+    "\n",
+    "\n",
+    "class RLStrategy(RLStrategy):\n",
+    "    def load_actor_params(self, load_path):\n",
+    "        \"\"\"\n",
+    "        Load actor parameters\n",
+    "        \"\"\"\n",
+    "        directory = f\"{load_path}/actors/actor_{self.unit_id}.pt\"\n",
+    "\n",
+    "        params = th.load(directory, map_location=self.device)\n",
+    "\n",
+    "        self.actor = self.actor_architecture_class(\n",
+    "            obs_dim=self.obs_dim,\n",
+    "            act_dim=self.act_dim,\n",
+    "            float_type=self.float_type,\n",
+    "            unique_obs_dim=self.unique_obs_dim,\n",
+    "            num_timeseries_obs_dim=self.num_timeseries_obs_dim,\n",
+    "        ).to(self.device)\n",
+    "\n",
+    "        self.actor.load_state_dict(params[\"actor\"])\n",
+    "\n",
+    "        if self.learning_mode:\n",
+    "            self.actor_target = self.actor_architecture_class(\n",
+    "                obs_dim=self.obs_dim,\n",
+    "                act_dim=self.act_dim,\n",
+    "                float_type=self.float_type,\n",
+    "                unique_obs_dim=self.unique_obs_dim,\n",
+    "                num_timeseries_obs_dim=self.num_timeseries_obs_dim,\n",
+    "            ).to(self.device)\n",
+    "            self.actor_target.load_state_dict(params[\"actor_target\"])\n",
+    "            self.actor_target.eval()\n",
+    "            self.actor.optimizer.load_state_dict(params[\"actor_optimizer\"])\n",
+    "\n",
+    "learning_config = {\n",
+    "    \"continue_learning\": False,\n",
+    "    \"trained_policies_save_path\": None,\n",
+    "    \"max_bid_price\": 100,\n",
+    "    \"algorithm\": \"matd3\",\n",
+    "    \"learning_rate\": 0.001,\n",
+    "    \"training_episodes\": 2,\n",
+    "    \"episodes_collecting_initial_experience\": 1,\n",
+    "    \"train_freq\": \"24h\",\n",
+    "    \"gradient_steps\": -1,\n",
+    "    \"batch_size\": 256,\n",
+    "    \"gamma\": 0.99,\n",
+    "    \"device\": \"cpu\",\n",
+    "    \"noise_sigma\": 0.1,\n",
+    "    \"noise_scale\": 1,\n",
+    "    \"noise_dt\": 1,\n",
+    "    \"validation_episodes_interval\": 5,\n",
+    "}\n",
+    "\n",
+    "# Read the YAML file\n",
+    "with open(f\"{inputs_path}/example_02a/config.yaml\") as file:\n",
+    "    data = yaml.safe_load(file)\n",
+    "\n",
+    "# store our modifications to the config file\n",
+    "data[\"base\"][\"learning_mode\"] = True\n",
+    "data[\"base\"][\"learning_config\"] = learning_config\n",
+    "\n",
+    "# Write the modified data back to the file\n",
+    "with open(f\"{inputs_path}/example_02a/config.yaml\", \"w\") as file:\n",
+    "    yaml.safe_dump(data, file)\n",
+    "\n",
+    "# Read the YAML file\n",
+    "with open(f\"{inputs_path}/example_02b/config.yaml\") as file:\n",
+    "    data = yaml.safe_load(file)\n",
+    "\n",
+    "# store our modifications to the config file\n",
+    "data[\"base\"][\"learning_mode\"] = True\n",
+    "data[\"base\"][\"learning_config\"] = learning_config\n",
+    "\n",
+    "# Write the modified data back to the file\n",
+    "with open(f\"{inputs_path}/example_02b/config.yaml\", \"w\") as file:\n",
+    "    yaml.safe_dump(data, file)\n",
+    "\n",
+    "# Read the YAML file\n",
+    "with open(f\"{inputs_path}/example_02c/config.yaml\") as file:\n",
+    "    data = yaml.safe_load(file)\n",
+    "\n",
+    "# store our modifications to the config file\n",
+    "data[\"base\"][\"learning_mode\"] = True\n",
+    "data[\"base\"][\"learning_config\"] = learning_config\n",
+    "\n",
+    "# Write the modified data back to the file\n",
+    "with open(f\"{inputs_path}/example_02c/config.yaml\", \"w\") as file:\n",
+    "    yaml.safe_dump(data, file)\n",
+    "\n",
+    "log = logging.getLogger(__name__)\n",
+    "\n",
+    "csv_path = \"outputs\"\n",
+    "os.makedirs(\"local_db\", exist_ok=True)\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    db_uri = \"sqlite:///local_db/assume_db.db\"\n",
+    "\n",
+    "    scenario = \"example_02a\"\n",
+    "    study_case = \"base\"\n",
+    "\n",
+    "    # create world\n",
+    "    world = World(database_uri=db_uri, export_csv_path=csv_path)\n",
+    "\n",
+    "    # we import our defined bidding strategey class including the learning into the world bidding strategies\n",
+    "    # in the example files we provided the name of the learning bidding strategies in the input csv \"pp_learning\"\n",
+    "    # hence we define this strategey to be the one of the learning class\n",
+    "    world.bidding_strategies[\"pp_learning\"] = RLStrategy\n",
+    "\n",
+    "    # then we load the scenario specified above from the respective input files\n",
+    "    load_scenario_folder(\n",
+    "        world,\n",
+    "        inputs_path=inputs_path,\n",
+    "        scenario=scenario,\n",
+    "        study_case=study_case,\n",
+    "    )\n",
+    "\n",
+    "    # run learning if learning mode is enabled\n",
+    "    # needed as we simulate the modelling horizon multiple times to train reinforcement learning run_learning( world, inputs_path=input_path, scenario=scenario, study_case=study_case, )\n",
+    "\n",
+    "    if world.learning_config.get(\"learning_mode\", False):\n",
+    "        run_learning(\n",
+    "            world,\n",
+    "            inputs_path=inputs_path,\n",
+    "            scenario=scenario,\n",
+    "            study_case=study_case,\n",
+    "        )\n",
+    "\n",
+    "    # after the learning is done we make a normal run of the simulation, which equals a test run\n",
+    "    world.run()\n",
+    "\n",
+    "log = logging.getLogger(__name__)\n",
+    "\n",
+    "csv_path = \"outputs\"\n",
+    "os.makedirs(\"local_db\", exist_ok=True)\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    db_uri = \"sqlite:///local_db/assume_db.db\"\n",
+    "\n",
+    "    scenario = \"example_02b\"\n",
+    "    study_case = \"base\"\n",
+    "\n",
+    "    # create world\n",
+    "    world = World(database_uri=db_uri, export_csv_path=csv_path)\n",
+    "\n",
+    "    # we import our defined bidding strategey class including the learning into the world bidding strategies\n",
+    "    # in the example files we provided the name of the learning bidding strategeis in the input csv is  \"pp_learning\"\n",
+    "    # hence we define this strategey to be one of the learning class\n",
+    "    world.bidding_strategies[\"pp_learning\"] = RLStrategy\n",
+    "\n",
+    "    # then we load the scenario specified above from the respective input files\n",
+    "    load_scenario_folder(\n",
+    "        world,\n",
+    "        inputs_path=inputs_path,\n",
+    "        scenario=scenario,\n",
+    "        study_case=study_case,\n",
+    "    )\n",
+    "\n",
+    "    # run learning if learning mode is enabled\n",
+    "    # needed as we simulate the modelling horizon multiple times to train reinforcement learning run_learning( world, inputs_path=input_path, scenario=scenario, study_case=study_case, )\n",
+    "\n",
+    "    if world.learning_config.get(\"learning_mode\", False):\n",
+    "        run_learning(\n",
+    "            world,\n",
+    "            inputs_path=inputs_path,\n",
+    "            scenario=scenario,\n",
+    "            study_case=study_case,\n",
+    "        )\n",
+    "\n",
+    "    # after the learning is done we make a normal run of the simulation, which equals a test run\n",
+    "    world.run()\n",
+    "\n",
+    "log = logging.getLogger(__name__)\n",
+    "\n",
+    "csv_path = \"outputs\"\n",
+    "os.makedirs(\"local_db\", exist_ok=True)\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    db_uri = \"sqlite:///local_db/assume_db.db\"\n",
+    "\n",
+    "    scenario = \"example_02c\"\n",
+    "    study_case = \"base\"\n",
+    "\n",
+    "    # create world\n",
+    "    world = World(database_uri=db_uri, export_csv_path=csv_path)\n",
+    "\n",
+    "    # we import our defined bidding strategey class including the learning into the world bidding strategies\n",
+    "    # in the example files we provided the name of the learning bidding strategeis in the input csv is  \"pp_learning\"\n",
+    "    # hence we define this strategey to be one of the learning class\n",
+    "    world.bidding_strategies[\"pp_learning\"] = RLStrategy\n",
+    "\n",
+    "    # then we load the scenario specified above from the respective input files\n",
+    "    load_scenario_folder(\n",
+    "        world,\n",
+    "        inputs_path=inputs_path,\n",
+    "        scenario=scenario,\n",
+    "        study_case=study_case,\n",
+    "    )\n",
+    "\n",
+    "    # run learning if learning mode is enabled\n",
+    "    # needed as we simulate the modelling horizon multiple times to train reinforcement learning run_learning( world, inputs_path=input_path, scenario=scenario, study_case=study_case, )\n",
+    "\n",
+    "    if world.learning_config.get(\"learning_mode\", False):\n",
+    "        run_learning(\n",
+    "            world,\n",
+    "            inputs_path=inputs_path,\n",
+    "            scenario=scenario,\n",
+    "            study_case=study_case,\n",
+    "        )\n",
+    "\n",
+    "    # after the learning is done we make a normal run of the simulation, which equals a test run\n",
+    "    world.run()\n",
+    "\n",
+    "!pip install matplotlib\n",
+    "\n",
+    "import os\n",
+    "from functools import partial\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sqlalchemy import create_engine\n",
+    "\n",
+    "os.makedirs(\"outputs\", exist_ok=True)\n",
+    "\n",
+    "db_uri = \"sqlite:///local_db/assume_db.db\"\n",
+    "\n",
+    "engine = create_engine(db_uri)\n",
+    "\n",
+    "\n",
+    "sql = \"\"\"\n",
+    "SELECT ident, simulation,\n",
+    "sum(round(CAST(value AS numeric), 2))  FILTER (WHERE variable = 'total_cost') as total_cost,\n",
+    "sum(round(CAST(value AS numeric), 2)*1000)  FILTER (WHERE variable = 'total_volume') as total_volume,\n",
+    "sum(round(CAST(value AS numeric), 2))  FILTER (WHERE variable = 'avg_price') as average_cost\n",
+    "FROM kpis\n",
+    "where variable in ('total_cost', 'total_volume', 'avg_price')\n",
+    "and simulation in ('example_02a_base', 'example_02b_base', 'example_02c_base')\n",
+    "group by simulation, ident ORDER BY simulation\n",
+    "\"\"\"\n",
+    "\n",
+    "\n",
+    "kpis = pd.read_sql(sql, engine)\n",
+    "\n",
+    "# sort the dataframe to have sho, bo and lo case in the right order\n",
+    "\n",
+    "# sort kpis in the order sho, bo, lo\n",
+    "\n",
+    "kpis = kpis.sort_values(\n",
+    "    by=\"simulation\",\n",
+    "    #    key=lambda x: x.map({\"example_02a\": 1, \"example_02b\": 2, \"example_02c\": 3}),\n",
+    ")\n",
+    "\n",
+    "\n",
+    "kpis[\"total_volume\"] /= 1e9\n",
+    "kpis[\"total_cost\"] /= 1e6\n",
+    "savefig = partial(plt.savefig, transparent=False, bbox_inches=\"tight\")\n",
+    "\n",
+    "xticks = kpis[\"simulation\"].unique()\n",
+    "plt.style.use(\"seaborn-v0_8\")\n",
+    "\n",
+    "fig, ax = plt.subplots(1, 1, figsize=(10, 6))\n",
+    "\n",
+    "ax2 = ax.twinx()  # Create another axes that shares the same x-axis as ax.\n",
+    "\n",
+    "width = 0.4\n",
+    "\n",
+    "kpis.total_volume.plot(kind=\"bar\", ax=ax, width=width, position=1, color=\"royalblue\")\n",
+    "kpis.total_cost.plot(kind=\"bar\", ax=ax2, width=width, position=0, color=\"green\")\n",
+    "\n",
+    "# set x-achxis limits\n",
+    "ax.set_xlim(-0.6, len(kpis[\"simulation\"]) - 0.4)\n",
+    "\n",
+    "# set y-achxis limits\n",
+    "ax.set_ylim(0, max(kpis.total_volume) * 1.1 + 0.1)\n",
+    "ax2.set_ylim(0, max(kpis.total_cost) * 1.1 + 0.1)\n",
+    "\n",
+    "ax.set_ylabel(\"Total Volume (GWh)\")\n",
+    "ax2.set_ylabel(\"Total Cost (M€)\")\n",
+    "\n",
+    "ax.set_xticklabels(xticks, rotation=45)\n",
+    "ax.set_xlabel(\"Simulation\")\n",
+    "\n",
+    "ax.legend([\"Total Volume\"], loc=\"upper left\")\n",
+    "ax2.legend([\"Total Cost\"], loc=\"upper right\")\n",
+    "\n",
+    "plt.title(\"Total Volume and Total Cost for each Simulation\")\n",
+    "\n",
+    "sql = \"\"\"\n",
+    "SELECT\n",
+    "  product_start AS \"time\",\n",
+    "  price AS \"Price\",\n",
+    "  simulation AS \"simulation\",\n",
+    "  node\n",
+    "FROM market_meta\n",
+    "WHERE simulation in ('example_02a_base', 'example_02b_base', 'example_02c_base') AND market_id in ('EOM') \n",
+    "GROUP BY market_id, simulation, product_start, price, node\n",
+    "ORDER BY product_start, node\n",
+    "\n",
+    "\"\"\"\n",
+    "\n",
+    "df = pd.read_sql(sql, engine)\n",
+    "\n",
+    "df\n",
+    "\n",
+    "# Convert the 'time' column to datetime\n",
+    "df[\"time\"] = pd.to_datetime(df[\"time\"])\n",
+    "\n",
+    "# Plot the data\n",
+    "plt.figure(figsize=(14, 7))\n",
+    "# Loop through each simulation and plot\n",
+    "for simulation in df[\"simulation\"].unique():\n",
+    "    subset = df[df[\"simulation\"] == simulation]\n",
+    "    plt.plot(subset[\"time\"], subset[\"Price\"], label=simulation)\n",
+    "\n",
+    "plt.title(\"Price over Time for Different Simulations\")\n",
+    "plt.xlabel(\"Time\")\n",
+    "plt.ylabel(\"Price\")\n",
+    "plt.legend(title=\"Simulation\")\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "97f4d181",
+   "metadata": {},
+   "outputs": [],
    "source": []
   }
  ],