fix simulation runs wrong reporting learning_mode (#198)

set the current episode correctly in run_learning set learning_mode once, now that we have it in outputs directly add simulation_id to tqdm remove learning_mode from outputs fix query if accepted_price is not float (whysoever)
assume-framework · Sep 20, 2023 · 163fb56 · 163fb56
1 parent e2af08c
commit 163fb56
Show file tree

Hide file tree

Showing 6 changed files with 5 additions and 5 deletions.
diff --git a/assume/common/outputs.py b/assume/common/outputs.py
@@ -174,6 +174,7 @@ def write_rl_params(self, rl_params):
         if df.empty:
             return
         df["simulation"] = self.simulation_id
+        df["learning_mode"] = self.learning_mode
         # get characters after last "_" of simulation id string
         df["episode"] = self.episode
         self.write_dfs["rl_params"].append(df)

diff --git a/assume/common/scenario_loader.py b/assume/common/scenario_loader.py
@@ -574,7 +574,7 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
                 inputs_path,
                 scenario,
                 study_case,
-                episode=world.learning_role.episodes_done,
+                episode=episode,
                 disable_learning=False,
             )
         # give the newly created rl_agent the buffer that we stored from the beginning

diff --git a/assume/common/units_operator.py b/assume/common/units_operator.py
@@ -384,7 +384,6 @@ def write_learning_to_output(self, start: datetime, marketconfig: MarketConfig):
                     "profit": unit.outputs["profit"].loc[start],
                     "reward": unit.outputs["reward"].loc[start],
                     "regret": unit.outputs["regret"].loc[start],
-                    "learning_mode": unit.outputs["learning_mode"].loc[start],
                     "unit": unit_id,
                 }
                 noise_tuple = unit.outputs["rl_exploration_noise"].loc[start]

diff --git a/assume/strategies/learning_strategies.py b/assume/strategies/learning_strategies.py
@@ -455,7 +455,6 @@ def calculate_reward(
         unit.outputs["profit"].loc[start:end_excl] += float(profit)
         unit.outputs["reward"].loc[start:end_excl] = reward
         unit.outputs["regret"].loc[start:end_excl] = float(opportunity_cost)
-        unit.outputs["learning_mode"].loc[start:end_excl] = self.learning_mode
 
     def load_actor_params(self, load_path):
         """

diff --git a/assume/world.py b/assume/world.py
@@ -365,7 +365,8 @@ async def async_run(self, start_ts, end_ts):
             if delta:
                 pbar.update(delta)
                 pbar.set_description(
-                    f"{datetime.utcfromtimestamp(self.clock.time)}", refresh=False
+                    f"{self.output_role.simulation_id} {datetime.utcfromtimestamp(self.clock.time)}",
+                    refresh=False,
                 )
             else:
                 self.clock.set_time(end_ts)

diff --git a/docker_configs/dashboard-definitions/ASSUME.json b/docker_configs/dashboard-definitions/ASSUME.json
@@ -422,7 +422,7 @@
           ],
           "metricColumn": "none",
           "rawQuery": true,
-          "rawSql": "SELECT\n  $__timeGroupAlias(start_time,$__interval),\n  avg(accepted_price) AS \" \",\n  bid_id AS \"bid_id\"\nFROM market_orders\nWHERE\n  $__timeFilter(start_time) AND\n  market_id = '$market' AND\n  simulation = '$simulation'\nGROUP BY 1, bid_id\nORDER BY 1",
+          "rawSql": "SELECT\n  $__timeGroupAlias(start_time,$__interval),\n  avg(accepted_price::float) AS \" \",\n  bid_id AS \"bid_id\"\nFROM market_orders\nWHERE\n  $__timeFilter(start_time) AND\n  market_id = '$market' AND\n  simulation = '$simulation'\nGROUP BY 1, bid_id\nORDER BY 1",
           "refId": "A",
           "select": [
             [