assume-framework · maurerle · Sep 25, 2023 · Sep 16, 2023 · Sep 16, 2023 · Sep 23, 2023
diff --git a/.gitignore b/.gitignore
@@ -133,8 +133,9 @@ dmypy.json
 .idea
 .vscode
 examples/inputs/learned_strategies
+workshop/
+
 examples/outputs
 examples/local_db/
-validation_runs
 assume-db
-forecasts_df.csv
+forecasts_df.csv
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -12,4 +12,4 @@ build:
     python: mambaforge-4.10
 
 conda:
-  environment: environment_docs.yml
+  environment: environment_docs.yaml
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # ASSUME: Agent-Based Electricity Markets Simulation Toolbox
 
-![Lint Status](https://github.com/assume-framework/assume/actions/workflows/lint-pytest.yml/badge.svg)
+![Lint Status](https://github.com/assume-framework/assume/actions/workflows/lint-pytest.yaml/badge.svg)
 [![Code Coverage](https://codecov.io/gh/assume-framework/assume/branch/main/graph/badge.svg?token=CZ4FO7P57H)](https://codecov.io/gh/assume-framework/assume)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8088760.svg)](https://doi.org/10.5281/zenodo.8088760)
 
@@ -114,7 +114,7 @@ pre-commit run --all-files
 First, create an environment that includes the documentation dependencies:
 
 ```bash
-conda env create -f environment_docs.yml
+conda env create -f environment_docs.yaml
 ```
 
 To generate or update the automatically created docs in `docs/source/assume*`, run:

diff --git a/assume/common/outputs.py b/assume/common/outputs.py
@@ -41,7 +41,7 @@ def __init__(
         end: datetime,
         db_engine=None,
         export_csv_path: str = "",
-        save_frequency_hours: int = 24,
+        save_frequency_hours: int = None,
         learning_mode: bool = False,
     ):
         super().__init__()
@@ -202,6 +202,7 @@ async def store_dfs(self):
         for table in self.write_dfs.keys():
             if len(self.write_dfs[table]) == 0:
                 continue
+
             df = pd.concat(self.write_dfs[table], axis=0)
             df.reset_index()
             if df.empty:
@@ -318,7 +319,8 @@ def write_market_dispatch(self, data):
         """
         df = pd.DataFrame(data, columns=["datetime", "power", "market_id", "unit_id"])
         df["simulation"] = self.simulation_id
-        self.write_dfs["market_dispatch"].append(df)
+        if not df.empty:
+            self.write_dfs["market_dispatch"].append(df)
 
     def write_unit_dispatch(self, data):
         """

diff --git a/assume/common/scenario_loader.py b/assume/common/scenario_loader.py
@@ -248,7 +248,7 @@ async def load_scenario_folder_async(
 
     # load the config file
     path = f"{inputs_path}/{scenario}"
-    with open(f"{path}/config.yml", "r") as f:
+    with open(f"{path}/config.yaml", "r") as f:
         config = yaml.safe_load(f)
         if not study_case:
             study_case = list(config.keys())[0]
@@ -559,6 +559,7 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
         act_dim=world.learning_role.act_dim,
         n_rl_units=len(world.learning_role.rl_strats),
         device=world.learning_role.device,
+        float_type=world.learning_role.float_type,
     )
     actors_and_critics = None
     world.output_role.del_similar_runs()
@@ -584,7 +585,7 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
         world.learning_role.buffer = buffer
         world.learning_role.episodes_done = episode
 
-        if episode + 1 >= world.learning_role.episodes_collecting_initial_experience:
+        if episode + 1 > world.learning_role.episodes_collecting_initial_experience:
             world.learning_role.turn_off_initial_exploration()
 
         world.run()
@@ -593,7 +594,9 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
             world.learning_role.training_episodes,
             world.learning_config.get("validation_episodes_interval", 5),
         )
-        if (episode + 1) % validation_interval == 0:
+        if (episode + 1) % validation_interval == 0 and (
+            episode + 1
+        ) > world.learning_role.episodes_collecting_initial_experience:
             old_path = world.learning_config["load_learned_path"]
             new_path = f"{old_path}_eval"
             # save validation params in validation path

diff --git a/assume/common/units_operator.py b/assume/common/units_operator.py
@@ -411,7 +411,9 @@ def write_to_learning(
         self,
         start: datetime,
         marketconfig: MarketConfig,
-        action_dimension: int,
+        obs_dim: int,
+        act_dim: int,
+        device: str,
         learning_unit_count: int,
     ):
         learning_role_id = "learning_agent"
@@ -422,27 +424,28 @@ def write_to_learning(
         try:
             import torch as th
 
-            all_actions = th.zeros(
-                (learning_unit_count, action_dimension), device="cpu"
-            )
         except ImportError:
             logger.error("tried writing learning_params, but torch is not installed")
-            all_actions = np.zeros((learning_unit_count, action_dimension))
+            all_actions = np.zeros((learning_unit_count, act_dim))
+            return
+
+        all_observations = th.zeros((learning_unit_count, obs_dim), device=device)
+        all_actions = th.zeros((learning_unit_count, act_dim), device=device)
 
         i = 0
-        for unit_id, unit in self.units.items():
+        for unit in self.units.values():
             # rl only for energy market for now!
             if isinstance(
                 unit.bidding_strategies.get(marketconfig.product_type),
                 LearningStrategy,
             ):
-                all_observations.append(
-                    np.array(unit.outputs["rl_observations"][start])
-                )
+                all_observations[i, :] = unit.outputs["rl_observations"][start]
                 all_actions[i, :] = unit.outputs["rl_actions"][start]
                 all_rewards.append(unit.outputs["reward"][start])
                 i += 1
+
         # convert all_actions list of tensor to numpy 2D array
+        all_observations = all_observations.squeeze().cpu().numpy()
         all_actions = all_actions.squeeze().cpu().numpy()
         all_rewards = np.array(all_rewards)
         rl_agent_data = (np.array(all_observations), all_actions, all_rewards)
@@ -467,16 +470,18 @@ def write_learning_params(self, orderbook: Orderbook, marketconfig: MarketConfig
         :type marketconfig: MarketConfig
         """
         learning_strategies = []
-        action_dimension = 0
+
         for unit in self.units.values():
             bidding_strategy = unit.bidding_strategies.get(marketconfig.product_type)
             if isinstance(bidding_strategy, LearningStrategy):
                 learning_strategies.append(bidding_strategy)
                 # should be the same across all strategies
-                action_dimension = bidding_strategy.act_dim
+                obs_dim = bidding_strategy.obs_dim
+                act_dim = bidding_strategy.act_dim
+                device = bidding_strategy.device
+
         # should write learning results if at least one bidding_strategy is a learning strategy
-        write_learning_results = len(learning_strategies) > 0 and orderbook
-        if write_learning_results:
+        if learning_strategies and orderbook:
             start = orderbook[0]["start_time"]
             # write learning output
             self.write_learning_to_output(start, marketconfig)
@@ -486,5 +491,10 @@ def write_learning_params(self, orderbook: Orderbook, marketconfig: MarketConfig
             if learning_strategies[0].learning_mode:
                 # in learning mode we are sending data to learning
                 self.write_to_learning(
-                    start, marketconfig, action_dimension, len(learning_strategies)
+                    start=start,
+                    marketconfig=marketconfig,
+                    obs_dim=obs_dim,
+                    act_dim=act_dim,
+                    device=device,
+                    learning_unit_count=len(learning_strategies),
                 )
diff --git a/assume/reinforcement_learning/algorithms/base_algorithm.py b/assume/reinforcement_learning/algorithms/base_algorithm.py
@@ -69,6 +69,6 @@ def __init__(
         self.unique_obs_len = 8
 
     def update_policy(self):
-        self.logger.error(
+        logger.error(
             "No policy update function of the used Rl algorithm was defined. Please define how the policies should be updated in the specific algorithm you use"
         )
diff --git a/assume/reinforcement_learning/buffer.py b/assume/reinforcement_learning/buffer.py
@@ -26,6 +26,7 @@ def __init__(
         act_dim: int,
         n_rl_units: int,
         device: str,
+        float_type,
     ):
         self.buffer_size = buffer_size
         self.obs_dim = obs_dim
@@ -35,8 +36,10 @@ def __init__(
         self.full = False
 
         self.device = device
-        self.np_float_type = np.float16 if self.device.type == "cuda" else np.float32
-        self.th_float_type = th.half if self.device.type == "cuda" else th.float
+
+        # future: use float16 for GPU
+        self.np_float_type = np.float16 if float_type == th.float16 else np.float32
+        self.th_float_type = float_type
 
         self.n_rl_units = n_rl_units
 

diff --git a/assume/reinforcement_learning/learning_role.py b/assume/reinforcement_learning/learning_role.py
@@ -61,8 +61,13 @@ def __init__(
             else "cpu"
         )
         self.device = th.device(cuda_device if th.cuda.is_available() else "cpu")
-        self.float_type = th.float16 if "cuda" in cuda_device else th.float
+
+        # future: add option to choose between float16 and float32
+        # float_type = learning_config.get("float_type", "float32")
+        self.float_type = th.float
+
         th.backends.cuda.matmul.allow_tf32 = True
+        th.backends.cudnn.allow_tf32 = True
 
         self.learning_rate = learning_config.get("learning_rate", 1e-4)
         self.episodes_collecting_initial_experience = learning_config.get(
@@ -437,12 +442,12 @@ def create_actors(self) -> None:
         The created actor networks are associated with each unit strategy and stored as attributes.
         """
         for _, unit_strategy in self.rl_strats.items():
-            unit_strategy.actor = Actor(self.obs_dim, self.act_dim, self.float_type).to(
-                self.device
-            )
+            unit_strategy.actor = Actor(
+                obs_dim=self.obs_dim, act_dim=self.act_dim, float_type=self.float_type
+            ).to(self.device)
 
             unit_strategy.actor_target = Actor(
-                self.obs_dim, self.act_dim, self.float_type
+                obs_dim=self.obs_dim, act_dim=self.act_dim, float_type=self.float_type
             ).to(self.device)
             unit_strategy.actor_target.load_state_dict(unit_strategy.actor.state_dict())
             unit_strategy.actor_target.train(mode=False)