diff --git a/assume/common/market_objects.py b/assume/common/market_objects.py index 433a3cf3..8a5e9afe 100644 --- a/assume/common/market_objects.py +++ b/assume/common/market_objects.py @@ -141,7 +141,7 @@ class MarketConfig: supports_get_unmatched: bool = False eligible_obligations_lambda: eligible_lambda = lambda x: True param_dict: dict = field(default_factory=dict) - addr: AgentAddress| None = None + addr: AgentAddress | None = None class OpeningMessage(TypedDict): diff --git a/assume/common/units_operator.py b/assume/common/units_operator.py index 21f9aafe..6bf343fa 100644 --- a/assume/common/units_operator.py +++ b/assume/common/units_operator.py @@ -185,7 +185,11 @@ def handle_opening(self, opening: OpeningMessage, meta: MetaDict) -> None: meta (MetaDict): The meta data of the market. """ logger.debug( - '%s received opening from: %s %s until: %s.', self.id, opening["market_id"], opening["start_time"], opening["end_time"] + "%s received opening from: %s %s until: %s.", + self.id, + opening["market_id"], + opening["start_time"], + opening["end_time"], ) self.context.schedule_instant_task(coroutine=self.submit_bids(opening, meta)) diff --git a/assume/markets/clearing_algorithms/nodal_pricing.py b/assume/markets/clearing_algorithms/nodal_pricing.py index dc910774..34eb69d1 100644 --- a/assume/markets/clearing_algorithms/nodal_pricing.py +++ b/assume/markets/clearing_algorithms/nodal_pricing.py @@ -157,6 +157,9 @@ def clear( status, termination_condition = nodal_network.optimize( solver_name=self.solver, solver_options=self.solver_options, + # do not show tqdm progress bars for large grids + # https://github.com/PyPSA/linopy/pull/375 + progress=False, ) if status != "ok": diff --git a/assume/markets/clearing_algorithms/redispatch.py b/assume/markets/clearing_algorithms/redispatch.py index 42f3617c..7d096822 100644 --- a/assume/markets/clearing_algorithms/redispatch.py +++ b/assume/markets/clearing_algorithms/redispatch.py @@ -193,6 +193,9 @@ def clear( status, termination_condition = redispatch_network.optimize( solver_name=self.solver, solver_options=self.solver_options, + # do not show tqdm progress bars for large grids + # https://github.com/PyPSA/linopy/pull/375 + progress=False, ) if status != "ok": diff --git a/docs/source/learning.rst b/docs/source/learning.rst index 38070aec..c1b59ef2 100644 --- a/docs/source/learning.rst +++ b/docs/source/learning.rst @@ -121,7 +121,7 @@ The actor policy of each agent is updated using the deterministic policy gradien ∇_a Q_i,θ_j(S_k, a_1,k, ..., a_N,k, π(o_i,k))|a_i,k=π(o_i,k) * ∇_θ π(o_i,k) The actor is updated similarly using only one critic network :math:`Q_{θ1}`. These changes to the original DDPG algorithm allow increased stability and convergence of the TD3 algorithm. This is especially relevant when approaching a multi-agent RL setup, as discussed in the foregoing section. -Please note that the actor and critics are updated by sampling experience from the buffer where all intercations of the agents are stored, namely the observations, actions and rewards. There are more complex buffers possible, like those that use importance sampling, but the default buffer is a simple replay buffer. You can find a documentation of the latter in :doc:`buffers` +Please note that the actor and critics are updated by sampling experience from the buffer where all interactions of the agents are stored, namely the observations, actions and rewards. There are more complex buffers possible, like those that use importance sampling, but the default buffer is a simple replay buffer. You can find a documentation of the latter in :doc:`buffers` The Learning Implementation in ASSUME diff --git a/examples/notebooks/04_reinforcement_learning_example.ipynb b/examples/notebooks/04_reinforcement_learning_example.ipynb index 2b213b3e..d85f4b33 100644 --- a/examples/notebooks/04_reinforcement_learning_example.ipynb +++ b/examples/notebooks/04_reinforcement_learning_example.ipynb @@ -300,13 +300,13 @@ "source": [ "import logging\n", "import os\n", - "import yaml\n", "from datetime import datetime, timedelta\n", "from pathlib import Path\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import torch as th\n", + "import yaml\n", "\n", "from assume import World\n", "from assume.common.base import LearningStrategy, SupportsMinMax\n", @@ -565,18 +565,18 @@ " # =============================================================================\n", " # 1.1 Get the Observations, which are the basis of the action decision\n", " # =============================================================================\n", - " \n", + "\n", " # residual load forecast\n", - " scaling_factor_res_load = None #TODO\n", + " scaling_factor_res_load = None # TODO\n", "\n", " # price forecast\n", - " scaling_factor_price = None #TODO\n", + " scaling_factor_price = None # TODO\n", "\n", " # total capacity\n", - " scaling_factor_total_capacity = None #TODO\n", + " scaling_factor_total_capacity = None # TODO\n", "\n", " # marginal cost\n", - " scaling_factor_marginal_cost = None #TODO\n", + " scaling_factor_marginal_cost = None # TODO\n", "\n", " # checks if we are at the end of the simulation horizon, since we need to change the forecast then\n", " # for residual load and price forecast and scale them\n", @@ -1689,9 +1689,8 @@ "# this cell is used to display the image in the notebook when using colab\n", "# or running the notebook locally\n", "\n", - "import os\n", - "\n", "import importlib.util\n", + "import os\n", "\n", "# Check if 'google.colab' is available\n", "IN_COLAB = importlib.util.find_spec(\"google.colab\") is not None\n", @@ -1707,17 +1706,17 @@ "colab_inputs_path = \"assume-repo/examples/inputs\"\n", "local_inputs_path = \"../inputs\"\n", "\n", - "inputs_path = colab_inputs_path if IN_COLAB else local_inputs_path \n", + "inputs_path = colab_inputs_path if IN_COLAB else local_inputs_path\n", "\n", "import logging\n", "import os\n", - "import yaml\n", "from datetime import datetime, timedelta\n", "from pathlib import Path\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import torch as th\n", + "import yaml\n", "\n", "from assume import World\n", "from assume.common.base import LearningStrategy, SupportsMinMax\n", @@ -1726,6 +1725,7 @@ "from assume.reinforcement_learning.learning_utils import NormalActionNoise\n", "from assume.scenario.loader_csv import load_scenario_folder, run_learning\n", "\n", + "\n", "class RLStrategy(LearningStrategy):\n", " \"\"\"\n", " Reinforcement Learning Strategy\n", @@ -1789,6 +1789,7 @@ " elif Path(load_path=kwargs[\"trained_policies_save_path\"]).is_dir():\n", " self.load_actor_params(load_path=kwargs[\"trained_policies_save_path\"])\n", "\n", + "\n", "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", "# this is a workaround to have different methods of the class in different cells\n", "# which is good for the purpose of this tutorial\n", @@ -1834,7 +1835,8 @@ " bids = self.remove_empty_bids(bids)\n", "\n", " return bids\n", - " \n", + "\n", + "\n", "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", "# this is a workaround to have different methods of the class in different cells\n", "# which is good for the purpose of this tutorial\n", @@ -1853,12 +1855,14 @@ " \"\"\"\n", "\n", " return None\n", - " \n", + "\n", + "\n", "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", "# this is a workaround to have different methods of the class in different cells\n", "# which is good for the purpose of this tutorial\n", "# however, you should have all functions in a single class when using this example in .py files\n", "\n", + "\n", "class RLStrategy(RLStrategy):\n", " def create_observation(\n", " self,\n", @@ -1879,7 +1883,7 @@ " # =============================================================================\n", " # 1.1 Get the Observations, which are the basis of the action decision\n", " # =============================================================================\n", - " \n", + "\n", " # residual load forecast\n", " scaling_factor_res_load = self.max_demand\n", "\n", @@ -1967,6 +1971,7 @@ "\n", " return observation.detach().clone()\n", "\n", + "\n", "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", "# this is a workaround to have different methods of the class in different cells\n", "# which is good for the purpose of this tutorial\n", @@ -1997,7 +2002,7 @@ " # 2.1 Get Actions and handle exploration\n", " # =============================================================================\n", " # ==> YOUR CODE HERE\n", - " base_bid = next_observation[-1] # = marginal_costs\n", + " base_bid = next_observation[-1] # = marginal_costs\n", " # add noise to the last dimension of the observation\n", " # needs to be adjusted if observation space is changed, because only makes sense\n", " # if the last dimension of the observation space are the marginal cost\n", @@ -2020,7 +2025,8 @@ " curr_action = curr_action.clamp(-1, 1)\n", "\n", " return curr_action, noise\n", - " \n", + "\n", + "\n", "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", "# this is a workaround to have different methods of the class in different cells\n", "# which is good for the purpose of this tutorial\n", @@ -2073,15 +2079,15 @@ " # actions are in the range [0,1], we need to transform them into actual bids\n", " # we can use our domain knowledge to guide the bid formulation\n", "\n", - " #calculate actual bids\n", - " #rescale actions to actual prices\n", + " # calculate actual bids\n", + " # rescale actions to actual prices\n", " bid_prices = actions * self.max_bid_price\n", "\n", - " #calculate inflexible part of the bid\n", + " # calculate inflexible part of the bid\n", " bid_quantity_inflex = min_power\n", " bid_price_inflex = min(bid_prices)\n", "\n", - " #calculate flexible part of the bid\n", + " # calculate flexible part of the bid\n", " bid_quantity_flex = max_power - bid_quantity_inflex\n", " bid_price_flex = max(bid_prices)\n", "\n", @@ -2114,7 +2120,8 @@ " bids = self.remove_empty_bids(bids)\n", "\n", " return bids\n", - " \n", + "\n", + "\n", "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", "# this is a workaround to have different methods of the class in different cells\n", "# which is good for the purpose of this tutorial\n", @@ -2215,6 +2222,7 @@ " unit.outputs[\"reward\"].loc[start:end_excl] = reward\n", " unit.outputs[\"regret\"].loc[start:end_excl] = opportunity_cost\n", "\n", + "\n", "# we define the class again and inherit from the initial class just to add the additional method to the original class\n", "# this is a workaround to have different methods of the class in different cells\n", "# which is good for the purpose of this tutorial\n", @@ -2252,6 +2260,7 @@ " self.actor_target.eval()\n", " self.actor.optimizer.load_state_dict(params[\"actor_optimizer\"])\n", "\n", + "\n", "learning_config = {\n", " \"continue_learning\": False,\n", " \"trained_policies_save_path\": None,\n", @@ -2534,7 +2543,7 @@ "plt.xlabel(\"Time\")\n", "plt.ylabel(\"Price\")\n", "plt.legend(title=\"Simulation\")\n", - "plt.show()\n" + "plt.show()" ] }, { diff --git a/pyproject.toml b/pyproject.toml index bb18c2aa..28e7ecf1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,6 +101,7 @@ ignore = ["E501", "G004", "E731"] "F841", # allow unused local variables ] "examples/notebooks/*" = [ + "E402", # allow imports the middle of notebooks "E999", # allow no expressions "F841", # allow unused local variables "F811", # allow import redeclaration