diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yaml
similarity index 100%
rename from .github/workflows/docker-publish.yml
rename to .github/workflows/docker-publish.yaml
diff --git a/.github/workflows/lint-pytest.yml b/.github/workflows/lint-pytest.yaml
similarity index 100%
rename from .github/workflows/lint-pytest.yml
rename to .github/workflows/lint-pytest.yaml
diff --git a/assume/common/outputs.py b/assume/common/outputs.py
index 42b4dba0..91e6f7b6 100644
--- a/assume/common/outputs.py
+++ b/assume/common/outputs.py
@@ -9,7 +9,7 @@
 from mango import Role
 from pandas.api.types import is_numeric_dtype
 from sqlalchemy import inspect, text
-from sqlalchemy.exc import OperationalError, ProgrammingError
+from sqlalchemy.exc import DataError, OperationalError, ProgrammingError
 
 logger = logging.getLogger(__name__)
 
@@ -43,6 +43,7 @@ def __init__(
         export_csv_path: str = "",
         save_frequency_hours: int = None,
         learning_mode: bool = False,
+        evaluation_mode: bool = False,
     ):
         super().__init__()
 
@@ -56,16 +57,23 @@ def __init__(
             self.p = Path(self.export_csv_path, simulation_id)
             shutil.rmtree(self.p, ignore_errors=True)
             self.p.mkdir(parents=True)
+
         self.db = db_engine
+
         self.learning_mode = learning_mode
+        self.evaluation_mode = evaluation_mode
 
-        # learning
+        # get episode number if in learning or evaluation mode
         self.episode = None
-        if self.learning_mode:
+        if self.learning_mode or self.evaluation_mode:
             episode = self.simulation_id.split("_")[-1]
             if episode.isdigit():
                 self.episode = int(episode)
 
+            # check if episode=0 and delete all similar runs
+            if self.episode == 0:
+                self.del_similar_runs()
+
         # contruct all timeframe under which hourly values are written to excel and db
         self.start = start
         self.end = end
@@ -99,7 +107,7 @@ def delete_db_scenario(self, simulation_id):
                 logger.debug("deleted %s rows from %s", rowcount, table_name)
 
     def del_similar_runs(self):
-        query = text("select distinct simulation from market_meta")
+        query = text("select distinct simulation from rl_params")
 
         try:
             with self.db.begin() as db:
@@ -173,10 +181,12 @@ def write_rl_params(self, rl_params):
         df = pd.DataFrame.from_records(rl_params, index="datetime")
         if df.empty:
             return
+
         df["simulation"] = self.simulation_id
         df["learning_mode"] = self.learning_mode
-        # get characters after last "_" of simulation id string
+        df["evaluation_mode"] = self.evaluation_mode
         df["episode"] = self.episode
+
         self.write_dfs["rl_params"].append(df)
 
     def write_market_results(self, market_meta):
@@ -357,7 +367,7 @@ async def on_stop(self):
         for query in queries:
             try:
                 df = pd.read_sql(query, self.db)
-            except (OperationalError, ProgrammingError):
+            except (OperationalError, DataError):
                 continue
             except Exception as e:
                 logger.error("could not read query: %s", e)
diff --git a/assume/common/scenario_loader.py b/assume/common/scenario_loader.py
index eb4e6842..a92ac824 100644
--- a/assume/common/scenario_loader.py
+++ b/assume/common/scenario_loader.py
@@ -230,8 +230,10 @@ async def load_scenario_folder_async(
     inputs_path: str,
     scenario: str,
     study_case: str,
-    disable_learning: bool = False,
+    perform_learning: bool = True,
+    perform_evaluation: bool = False,
     episode: int = 0,
+    eval_episode: int = 0,
     load_learned_path: str = "",
 ):
     """Load a scenario from a given path. Raises: ValueError: If the scenario or study case is not found.
@@ -248,11 +250,10 @@ async def load_scenario_folder_async(
 
     # load the config file
     path = f"{inputs_path}/{scenario}"
-    with open(f"{path}/config.yaml", "r") as f:
-        config = yaml.safe_load(f)
-        if not study_case:
-            study_case = list(config.keys())[0]
-        config = config[study_case]
+    config = yaml.safe_load(open(f"{path}/config.yaml", "r"))
+    if not study_case:
+        study_case = list(config.keys())[0]
+    config = config[study_case]
     logger.info(f"Starting Scenario {scenario}/{study_case} from {inputs_path}")
 
     world.reset()
@@ -298,8 +299,10 @@ async def load_scenario_folder_async(
     learning_config: LearningConfig = config.get("learning_config", {})
     bidding_strategy_params = config.get("bidding_strategy_params", {})
 
-    if disable_learning:
-        learning_config["learning_mode"] = False
+    learning_config["learning_mode"] = (
+        config.get("learning_mode", False) and perform_learning
+    )
+    learning_config["evaluation_mode"] = perform_evaluation
 
     if "load_learned_path" not in learning_config.keys():
         if load_learned_path:
@@ -312,6 +315,9 @@ async def load_scenario_folder_async(
     if learning_config.get("learning_mode", False):
         sim_id = f"{sim_id}_{episode}"
 
+    if learning_config.get("evaluation_mode", False):
+        sim_id = f"{sim_id}_eval_{eval_episode}"
+
     # add forecast provider
     logger.info("Adding forecast")
     forecaster = CsvForecaster(
@@ -515,8 +521,10 @@ def load_scenario_folder(
     inputs_path: str,
     scenario: str,
     study_case: str,
-    disable_learning: bool = False,
+    perform_learning: bool = True,
+    perform_evaluation: bool = False,
     episode: int = 0,
+    eval_episode: int = 0,
     load_learned_path="",
 ):
     """
@@ -537,8 +545,10 @@ def load_scenario_folder(
             inputs_path=inputs_path,
             scenario=scenario,
             study_case=study_case,
-            disable_learning=disable_learning,
+            perform_learning=perform_learning,
+            perform_evaluation=perform_evaluation,
             episode=episode,
+            eval_episode=eval_episode,
             load_learned_path=load_learned_path,
         )
     )
@@ -564,20 +574,22 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
     actors_and_critics = None
     world.output_role.del_similar_runs()
 
+    eval_episode = 1
     for episode in tqdm(
-        range(world.learning_role.training_episodes),
+        range(1, world.learning_role.training_episodes + 1),
         desc="Training Episodes",
     ):
         # TODO normally, loading twice should not create issues, somehow a scheduling issue is raised currently
-        if episode:
+        if episode != 1:
             load_scenario_folder(
                 world,
                 inputs_path,
                 scenario,
                 study_case,
+                perform_learning=True,
                 episode=episode,
-                disable_learning=False,
             )
+
         # give the newly created rl_agent the buffer that we stored from the beginning
         world.learning_role.create_actors_and_critics(
             actors_and_critics=actors_and_critics
@@ -585,7 +597,7 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
         world.learning_role.buffer = buffer
         world.learning_role.episodes_done = episode
 
-        if episode + 1 > world.learning_role.episodes_collecting_initial_experience:
+        if episode > world.learning_role.episodes_collecting_initial_experience:
             world.learning_role.turn_off_initial_exploration()
 
         world.run()
@@ -594,9 +606,10 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
             world.learning_role.training_episodes,
             world.learning_config.get("validation_episodes_interval", 5),
         )
-        if (episode + 1) % validation_interval == 0 and (
-            episode + 1
-        ) > world.learning_role.episodes_collecting_initial_experience:
+        if (
+            episode % validation_interval == 0
+            and episode > world.learning_role.episodes_collecting_initial_experience
+        ):
             old_path = world.learning_config["load_learned_path"]
             new_path = f"{old_path}_eval"
             # save validation params in validation path
@@ -609,7 +622,9 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
                 inputs_path,
                 scenario,
                 study_case,
-                disable_learning=True,
+                perform_learning=False,
+                perform_evaluation=True,
+                eval_episode=eval_episode,
                 load_learned_path=new_path,
             )
             world.run()
@@ -620,13 +635,16 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
                 # save new best params for simulation
                 best_reward = avg_reward
                 world.learning_role.save_params(directory=old_path)
+
+            eval_episode += 1
+
         world.reset()
 
         # in load_scenario_folder_async, we initiate new container and kill old if present
         # as long as we do not skip setup container should be handled correctly
         # if enough initial experience was collected according to specifications in learning config
         # turn off initial exploration and go into full learning mode
-        if episode + 1 >= world.learning_role.episodes_collecting_initial_experience:
+        if episode >= world.learning_role.episodes_collecting_initial_experience:
             world.learning_role.turn_off_initial_exploration()
 
         # container shutdown implicitly with new initialisation
@@ -640,5 +658,5 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str)
         inputs_path,
         scenario,
         study_case,
-        disable_learning=True,
+        perform_learning=False,
     )
diff --git a/assume/common/units_operator.py b/assume/common/units_operator.py
index b2dafa96..1541ffea 100644
--- a/assume/common/units_operator.py
+++ b/assume/common/units_operator.py
@@ -398,7 +398,7 @@ def write_learning_to_output(self, start: datetime, marketconfig: MarketConfig):
         db_aid = self.context.data_dict.get("learning_output_agent_id")
         db_addr = self.context.data_dict.get("learning_output_agent_addr")
 
-        if db_aid and db_addr:
+        if db_aid and db_addr and output_agent_list:
             self.context.schedule_instant_acl_message(
                 receiver_id=db_aid,
                 receiver_addr=db_addr,
diff --git a/assume/world.py b/assume/world.py
index 1a0066c8..d4df257e 100644
--- a/assume/world.py
+++ b/assume/world.py
@@ -143,8 +143,11 @@ async def setup(
 
     async def setup_learning(self):
         self.bidding_params.update(self.learning_config)
+
         # initiate learning if the learning mode is on and hence we want to learn new strategies
         self.learning_mode = self.learning_config.get("learning_mode", False)
+        self.evaluation_mode = self.learning_config.get("evaluation_mode", False)
+
         if self.learning_mode:
             # if so, we initate the rl learning role with parameters
             from assume.reinforcement_learning.learning_role import Learning
@@ -182,6 +185,7 @@ async def setup_output_agent(self, simulation_id: str, save_frequency_hours: int
             export_csv_path=self.export_csv_path,
             save_frequency_hours=save_frequency_hours,
             learning_mode=self.learning_mode,
+            evaluation_mode=self.evaluation_mode,
         )
         if self.same_process:
             output_agent = RoleAgent(
@@ -325,14 +329,14 @@ def add_market_operator(
         market_operator_agent.markets = []
 
         # after creation of an agent - we set additional context params
-        market_operator_agent._role_context.data_dict = {
-            "output_agent_addr": None
-            if self.learning_mode
-            else self.output_agent_addr[0],
-            "output_agent_id": None
-            if self.learning_mode
-            else self.output_agent_addr[1],
-        }
+        market_operator_agent._role_context.data_dict = {}
+        if not self.learning_mode and not self.evaluation_mode:
+            market_operator_agent._role_context.data_dict.update(
+                {
+                    "output_agent_addr": self.output_agent_addr[0],
+                    "output_agent_id": self.output_agent_addr[1],
+                }
+            )
         self.market_operators[id] = market_operator_agent
 
     def add_market(
diff --git a/docker_configs/dashboard-definitions/ASSUME_Learning.json b/docker_configs/dashboard-definitions/ASSUME_Learning.json
index 1357d080..68cb429d 100644
--- a/docker_configs/dashboard-definitions/ASSUME_Learning.json
+++ b/docker_configs/dashboard-definitions/ASSUME_Learning.json
@@ -361,7 +361,199 @@
           ]
         }
       ],
-      "title": "Average Reward of all RL Unit ",
+      "title": "Average reward of all learning units",
+      "transformations": [
+        {
+          "id": "calculateField",
+          "options": {
+            "binary": {
+              "left": "index",
+              "reducer": "sum",
+              "right": "episode"
+            },
+            "mode": "binary",
+            "reduce": {
+              "reducer": "sum"
+            }
+          }
+        },
+        {
+          "id": "groupBy",
+          "options": {
+            "fields": {
+              "episode": {
+                "aggregations": [],
+                "operation": "groupby"
+              },
+              "index  episode": {
+                "aggregations": [],
+                "operation": "aggregate"
+              },
+              "reward": {
+                "aggregations": [
+                  "mean"
+                ],
+                "operation": "aggregate"
+              }
+            }
+          }
+        },
+        {
+          "id": "sortBy",
+          "options": {
+            "fields": {},
+            "sort": [
+              {
+                "field": "episode"
+              }
+            ]
+          }
+        }
+      ],
+      "type": "barchart"
+    },
+    {
+      "datasource": {
+        "type": "postgres",
+        "uid": "P7B13B9DF907EC40C"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "fillOpacity": 80,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineWidth": 1,
+            "scaleDistribution": {
+              "type": "linear"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 21
+      },
+      "id": 35,
+      "options": {
+        "barRadius": 0,
+        "barWidth": 0.97,
+        "groupWidth": 0.7,
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "orientation": "auto",
+        "showValue": "always",
+        "stacking": "none",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        },
+        "xField": "episode",
+        "xTickLabelRotation": 0,
+        "xTickLabelSpacing": 100
+      },
+      "pluginVersion": "9.2.15",
+      "targets": [
+        {
+          "datasource": {
+            "type": "postgres",
+            "uid": "P7B13B9DF907EC40C"
+          },
+          "format": "table",
+          "group": [],
+          "metricColumn": "simulation",
+          "rawQuery": true,
+          "rawSql": "SELECT\n  datetime,\n  simulation,\n  unit,\n  episode,\n  reward\nFROM rl_params\nwhere simulation like '${simulation}' || '_%'\nand evaluation_mode is true",
+          "refId": "A",
+          "select": [
+            [
+              {
+                "params": [
+                  "reward"
+                ],
+                "type": "column"
+              },
+              {
+                "params": [
+                  "avg",
+                  "24"
+                ],
+                "type": "moving_window"
+              },
+              {
+                "params": [
+                  "avg_reward"
+                ],
+                "type": "alias"
+              }
+            ],
+            [
+              {
+                "params": [
+                  "reward"
+                ],
+                "type": "column"
+              },
+              {
+                "params": [
+                  "avg",
+                  "24"
+                ],
+                "type": "moving_window"
+              },
+              {
+                "params": [
+                  "avg_reward"
+                ],
+                "type": "alias"
+              }
+            ]
+          ],
+          "table": "rl_params",
+          "timeColumn": "index",
+          "timeColumnType": "timestamp",
+          "where": [
+            {
+              "name": "$__timeFilter",
+              "params": [],
+              "type": "macro"
+            }
+          ]
+        }
+      ],
+      "title": "Average evaluation reward of all learning units",
       "transformations": [
         {
           "id": "calculateField",
@@ -506,7 +698,7 @@
         "h": 15,
         "w": 24,
         "x": 0,
-        "y": 21
+        "y": 29
       },
       "id": 14,
       "options": {
@@ -624,7 +816,7 @@
         "h": 11,
         "w": 24,
         "x": 0,
-        "y": 36
+        "y": 44
       },
       "id": 22,
       "options": {
@@ -737,7 +929,7 @@
         "h": 8,
         "w": 24,
         "x": 0,
-        "y": 47
+        "y": 55
       },
       "id": 24,
       "options": {
@@ -826,7 +1018,7 @@
         "h": 9,
         "w": 24,
         "x": 0,
-        "y": 55
+        "y": 63
       },
       "id": 6,
       "options": {
@@ -884,7 +1076,7 @@
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 64
+        "y": 72
       },
       "id": 16,
       "panels": [],
@@ -955,7 +1147,7 @@
         "h": 8,
         "w": 12,
         "x": 0,
-        "y": 65
+        "y": 73
       },
       "id": 31,
       "options": {
@@ -1163,7 +1355,7 @@
         "h": 16,
         "w": 12,
         "x": 12,
-        "y": 65
+        "y": 73
       },
       "id": 33,
       "options": {
@@ -1383,7 +1575,7 @@
         "h": 8,
         "w": 12,
         "x": 0,
-        "y": 73
+        "y": 81
       },
       "id": 32,
       "options": {
@@ -1594,7 +1786,7 @@
         "h": 8,
         "w": 12,
         "x": 0,
-        "y": 81
+        "y": 89
       },
       "id": 29,
       "options": {
@@ -1708,7 +1900,7 @@
         "h": 8,
         "w": 12,
         "x": 12,
-        "y": 81
+        "y": 89
       },
       "id": 34,
       "options": {
@@ -1806,8 +1998,7 @@
             "mode": "absolute",
             "steps": [
               {
-                "color": "green",
-                "value": null
+                "color": "green"
               },
               {
                 "color": "red",
@@ -1822,7 +2013,7 @@
         "h": 8,
         "w": 12,
         "x": 0,
-        "y": 89
+        "y": 97
       },
       "id": 26,
       "options": {
@@ -1920,8 +2111,7 @@
             "mode": "absolute",
             "steps": [
               {
-                "color": "green",
-                "value": null
+                "color": "green"
               },
               {
                 "color": "red",
@@ -1936,7 +2126,7 @@
         "h": 8,
         "w": 12,
         "x": 12,
-        "y": 89
+        "y": 97
       },
       "id": 30,
       "options": {
@@ -1998,26 +2188,22 @@
     "list": [
       {
         "current": {
-          "selected": true,
-          "text": [
-            "example_01_rl_base"
-          ],
-          "value": [
-            "example_01_rl_base"
-          ]
+          "selected": false,
+          "text": "example_01_rl_base",
+          "value": "example_01_rl_base"
         },
         "datasource": {
           "type": "postgres",
           "uid": "P7B13B9DF907EC40C"
         },
-        "definition": "SELECT DISTINCT\n        SUBSTRING(m.simulation, 0, LENGTH(m.simulation) +1  - strpos(REVERSE(m.simulation),'_')) AS market_simulation\n      FROM \n        rl_params m",
+        "definition": "SELECT DISTINCT\nSUBSTRING(m.simulation, 0, LENGTH(m.simulation) +1  - strpos(REVERSE(m.simulation),'_')) AS market_simulation\nFROM rl_params m",
         "description": "Can choose which simulation we want to show ",
         "hide": 0,
         "includeAll": false,
         "multi": false,
         "name": "simulation",
         "options": [],
-        "query": "SELECT DISTINCT\n        SUBSTRING(m.simulation, 0, LENGTH(m.simulation) +1  - strpos(REVERSE(m.simulation),'_')) AS market_simulation\n      FROM \n        rl_params m",
+        "query": "SELECT DISTINCT\nSUBSTRING(m.simulation, 0, LENGTH(m.simulation) +1  - strpos(REVERSE(m.simulation),'_')) AS market_simulation\nFROM rl_params m",
         "refresh": 2,
         "regex": "",
         "skipUrlSync": false,
@@ -2034,7 +2220,7 @@
           "type": "postgres",
           "uid": "P7B13B9DF907EC40C"
         },
-        "definition": "SELECT distinct unit\nFROM rl_params\nwhere simulation like ${simulation} || '_%'",
+        "definition": "SELECT DISTINCT unit\nFROM rl_params\nwhere simulation like '${simulation}' || '_%'",
         "description": "All units that have an reinforcment learning strategy and hence have the Rl specific parameteres logged",
         "hide": 0,
         "includeAll": false,
@@ -2042,7 +2228,7 @@
         "multi": false,
         "name": "rl_unit",
         "options": [],
-        "query": "SELECT distinct unit\nFROM rl_params\nwhere simulation like ${simulation} || '_%'",
+        "query": "SELECT DISTINCT unit\nFROM rl_params\nwhere simulation like '${simulation}' || '_%'",
         "refresh": 2,
         "regex": "",
         "skipUrlSync": false,
@@ -2053,7 +2239,7 @@
   },
   "time": {
     "from": "2018-12-31T23:00:00.000Z",
-    "to": "2019-01-05T22:59:59.000Z"
+    "to": "2019-01-31T22:59:59.000Z"
   },
   "timepicker": {
     "refresh_intervals": [
@@ -2068,6 +2254,6 @@
   "timezone": "",
   "title": "Assume: Training progress",
   "uid": "JKQzx0q4k",
-  "version": 6,
+  "version": 7,
   "weekStart": ""
 }
diff --git a/examples/inputs/example_01_rl/config.yaml b/examples/inputs/example_01_rl/config.yaml
index 7142c9fd..107291d7 100644
--- a/examples/inputs/example_01_rl/config.yaml
+++ b/examples/inputs/example_01_rl/config.yaml
@@ -1,8 +1,9 @@
 base:
   start_date: 2019-01-01 00:00
-  end_date: 2019-02-01 00:00
+  end_date: 2019-01-10 00:00
   time_step: 1h
   save_frequency_hours: Null
+  learning_mode: True
 
   learning_config:
     observation_dimension: 50
@@ -10,7 +11,6 @@ base:
     continue_learning: False
     load_model_path: None
     max_bid_price: 100
-    learning_mode: True
     algorithm: matd3
     learning_rate: 0.001
     training_episodes: 30
@@ -23,7 +23,7 @@ base:
     noise_sigma: 0.1
     noise_scale: 1
     noise_dt: 1
-    validation_episodes_interval: 10
+    validation_episodes_interval: 5
 
   markets_config:
     EOM:
@@ -48,6 +48,7 @@ tiny:
   end_date: 2019-01-02 00:00
   time_step: 1h
   save_frequency_hours: 24
+  learning_mode: True
 
   learning_config:
     observation_dimension: 50
@@ -55,7 +56,6 @@ tiny:
     continue_learning: False
     load_model_path: None
     max_bid_price: 100
-    learning_mode: True
     algorithm: matd3
     learning_rate: 0.001
     training_episodes: 3