diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yaml similarity index 100% rename from .github/workflows/docker-publish.yml rename to .github/workflows/docker-publish.yaml diff --git a/.github/workflows/lint-pytest.yml b/.github/workflows/lint-pytest.yaml similarity index 100% rename from .github/workflows/lint-pytest.yml rename to .github/workflows/lint-pytest.yaml diff --git a/assume/common/outputs.py b/assume/common/outputs.py index 42b4dba0..91e6f7b6 100644 --- a/assume/common/outputs.py +++ b/assume/common/outputs.py @@ -9,7 +9,7 @@ from mango import Role from pandas.api.types import is_numeric_dtype from sqlalchemy import inspect, text -from sqlalchemy.exc import OperationalError, ProgrammingError +from sqlalchemy.exc import DataError, OperationalError, ProgrammingError logger = logging.getLogger(__name__) @@ -43,6 +43,7 @@ def __init__( export_csv_path: str = "", save_frequency_hours: int = None, learning_mode: bool = False, + evaluation_mode: bool = False, ): super().__init__() @@ -56,16 +57,23 @@ def __init__( self.p = Path(self.export_csv_path, simulation_id) shutil.rmtree(self.p, ignore_errors=True) self.p.mkdir(parents=True) + self.db = db_engine + self.learning_mode = learning_mode + self.evaluation_mode = evaluation_mode - # learning + # get episode number if in learning or evaluation mode self.episode = None - if self.learning_mode: + if self.learning_mode or self.evaluation_mode: episode = self.simulation_id.split("_")[-1] if episode.isdigit(): self.episode = int(episode) + # check if episode=0 and delete all similar runs + if self.episode == 0: + self.del_similar_runs() + # contruct all timeframe under which hourly values are written to excel and db self.start = start self.end = end @@ -99,7 +107,7 @@ def delete_db_scenario(self, simulation_id): logger.debug("deleted %s rows from %s", rowcount, table_name) def del_similar_runs(self): - query = text("select distinct simulation from market_meta") + query = text("select distinct simulation from rl_params") try: with self.db.begin() as db: @@ -173,10 +181,12 @@ def write_rl_params(self, rl_params): df = pd.DataFrame.from_records(rl_params, index="datetime") if df.empty: return + df["simulation"] = self.simulation_id df["learning_mode"] = self.learning_mode - # get characters after last "_" of simulation id string + df["evaluation_mode"] = self.evaluation_mode df["episode"] = self.episode + self.write_dfs["rl_params"].append(df) def write_market_results(self, market_meta): @@ -357,7 +367,7 @@ async def on_stop(self): for query in queries: try: df = pd.read_sql(query, self.db) - except (OperationalError, ProgrammingError): + except (OperationalError, DataError): continue except Exception as e: logger.error("could not read query: %s", e) diff --git a/assume/common/scenario_loader.py b/assume/common/scenario_loader.py index eb4e6842..a92ac824 100644 --- a/assume/common/scenario_loader.py +++ b/assume/common/scenario_loader.py @@ -230,8 +230,10 @@ async def load_scenario_folder_async( inputs_path: str, scenario: str, study_case: str, - disable_learning: bool = False, + perform_learning: bool = True, + perform_evaluation: bool = False, episode: int = 0, + eval_episode: int = 0, load_learned_path: str = "", ): """Load a scenario from a given path. Raises: ValueError: If the scenario or study case is not found. @@ -248,11 +250,10 @@ async def load_scenario_folder_async( # load the config file path = f"{inputs_path}/{scenario}" - with open(f"{path}/config.yaml", "r") as f: - config = yaml.safe_load(f) - if not study_case: - study_case = list(config.keys())[0] - config = config[study_case] + config = yaml.safe_load(open(f"{path}/config.yaml", "r")) + if not study_case: + study_case = list(config.keys())[0] + config = config[study_case] logger.info(f"Starting Scenario {scenario}/{study_case} from {inputs_path}") world.reset() @@ -298,8 +299,10 @@ async def load_scenario_folder_async( learning_config: LearningConfig = config.get("learning_config", {}) bidding_strategy_params = config.get("bidding_strategy_params", {}) - if disable_learning: - learning_config["learning_mode"] = False + learning_config["learning_mode"] = ( + config.get("learning_mode", False) and perform_learning + ) + learning_config["evaluation_mode"] = perform_evaluation if "load_learned_path" not in learning_config.keys(): if load_learned_path: @@ -312,6 +315,9 @@ async def load_scenario_folder_async( if learning_config.get("learning_mode", False): sim_id = f"{sim_id}_{episode}" + if learning_config.get("evaluation_mode", False): + sim_id = f"{sim_id}_eval_{eval_episode}" + # add forecast provider logger.info("Adding forecast") forecaster = CsvForecaster( @@ -515,8 +521,10 @@ def load_scenario_folder( inputs_path: str, scenario: str, study_case: str, - disable_learning: bool = False, + perform_learning: bool = True, + perform_evaluation: bool = False, episode: int = 0, + eval_episode: int = 0, load_learned_path="", ): """ @@ -537,8 +545,10 @@ def load_scenario_folder( inputs_path=inputs_path, scenario=scenario, study_case=study_case, - disable_learning=disable_learning, + perform_learning=perform_learning, + perform_evaluation=perform_evaluation, episode=episode, + eval_episode=eval_episode, load_learned_path=load_learned_path, ) ) @@ -564,20 +574,22 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str) actors_and_critics = None world.output_role.del_similar_runs() + eval_episode = 1 for episode in tqdm( - range(world.learning_role.training_episodes), + range(1, world.learning_role.training_episodes + 1), desc="Training Episodes", ): # TODO normally, loading twice should not create issues, somehow a scheduling issue is raised currently - if episode: + if episode != 1: load_scenario_folder( world, inputs_path, scenario, study_case, + perform_learning=True, episode=episode, - disable_learning=False, ) + # give the newly created rl_agent the buffer that we stored from the beginning world.learning_role.create_actors_and_critics( actors_and_critics=actors_and_critics @@ -585,7 +597,7 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str) world.learning_role.buffer = buffer world.learning_role.episodes_done = episode - if episode + 1 > world.learning_role.episodes_collecting_initial_experience: + if episode > world.learning_role.episodes_collecting_initial_experience: world.learning_role.turn_off_initial_exploration() world.run() @@ -594,9 +606,10 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str) world.learning_role.training_episodes, world.learning_config.get("validation_episodes_interval", 5), ) - if (episode + 1) % validation_interval == 0 and ( - episode + 1 - ) > world.learning_role.episodes_collecting_initial_experience: + if ( + episode % validation_interval == 0 + and episode > world.learning_role.episodes_collecting_initial_experience + ): old_path = world.learning_config["load_learned_path"] new_path = f"{old_path}_eval" # save validation params in validation path @@ -609,7 +622,9 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str) inputs_path, scenario, study_case, - disable_learning=True, + perform_learning=False, + perform_evaluation=True, + eval_episode=eval_episode, load_learned_path=new_path, ) world.run() @@ -620,13 +635,16 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str) # save new best params for simulation best_reward = avg_reward world.learning_role.save_params(directory=old_path) + + eval_episode += 1 + world.reset() # in load_scenario_folder_async, we initiate new container and kill old if present # as long as we do not skip setup container should be handled correctly # if enough initial experience was collected according to specifications in learning config # turn off initial exploration and go into full learning mode - if episode + 1 >= world.learning_role.episodes_collecting_initial_experience: + if episode >= world.learning_role.episodes_collecting_initial_experience: world.learning_role.turn_off_initial_exploration() # container shutdown implicitly with new initialisation @@ -640,5 +658,5 @@ def run_learning(world: World, inputs_path: str, scenario: str, study_case: str) inputs_path, scenario, study_case, - disable_learning=True, + perform_learning=False, ) diff --git a/assume/common/units_operator.py b/assume/common/units_operator.py index b2dafa96..1541ffea 100644 --- a/assume/common/units_operator.py +++ b/assume/common/units_operator.py @@ -398,7 +398,7 @@ def write_learning_to_output(self, start: datetime, marketconfig: MarketConfig): db_aid = self.context.data_dict.get("learning_output_agent_id") db_addr = self.context.data_dict.get("learning_output_agent_addr") - if db_aid and db_addr: + if db_aid and db_addr and output_agent_list: self.context.schedule_instant_acl_message( receiver_id=db_aid, receiver_addr=db_addr, diff --git a/assume/world.py b/assume/world.py index 1a0066c8..d4df257e 100644 --- a/assume/world.py +++ b/assume/world.py @@ -143,8 +143,11 @@ async def setup( async def setup_learning(self): self.bidding_params.update(self.learning_config) + # initiate learning if the learning mode is on and hence we want to learn new strategies self.learning_mode = self.learning_config.get("learning_mode", False) + self.evaluation_mode = self.learning_config.get("evaluation_mode", False) + if self.learning_mode: # if so, we initate the rl learning role with parameters from assume.reinforcement_learning.learning_role import Learning @@ -182,6 +185,7 @@ async def setup_output_agent(self, simulation_id: str, save_frequency_hours: int export_csv_path=self.export_csv_path, save_frequency_hours=save_frequency_hours, learning_mode=self.learning_mode, + evaluation_mode=self.evaluation_mode, ) if self.same_process: output_agent = RoleAgent( @@ -325,14 +329,14 @@ def add_market_operator( market_operator_agent.markets = [] # after creation of an agent - we set additional context params - market_operator_agent._role_context.data_dict = { - "output_agent_addr": None - if self.learning_mode - else self.output_agent_addr[0], - "output_agent_id": None - if self.learning_mode - else self.output_agent_addr[1], - } + market_operator_agent._role_context.data_dict = {} + if not self.learning_mode and not self.evaluation_mode: + market_operator_agent._role_context.data_dict.update( + { + "output_agent_addr": self.output_agent_addr[0], + "output_agent_id": self.output_agent_addr[1], + } + ) self.market_operators[id] = market_operator_agent def add_market( diff --git a/docker_configs/dashboard-definitions/ASSUME_Learning.json b/docker_configs/dashboard-definitions/ASSUME_Learning.json index 1357d080..68cb429d 100644 --- a/docker_configs/dashboard-definitions/ASSUME_Learning.json +++ b/docker_configs/dashboard-definitions/ASSUME_Learning.json @@ -361,7 +361,199 @@ ] } ], - "title": "Average Reward of all RL Unit ", + "title": "Average reward of all learning units", + "transformations": [ + { + "id": "calculateField", + "options": { + "binary": { + "left": "index", + "reducer": "sum", + "right": "episode" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "groupBy", + "options": { + "fields": { + "episode": { + "aggregations": [], + "operation": "groupby" + }, + "index episode": { + "aggregations": [], + "operation": "aggregate" + }, + "reward": { + "aggregations": [ + "mean" + ], + "operation": "aggregate" + } + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "episode" + } + ] + } + } + ], + "type": "barchart" + }, + { + "datasource": { + "type": "postgres", + "uid": "P7B13B9DF907EC40C" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 35, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "auto", + "showValue": "always", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xField": "episode", + "xTickLabelRotation": 0, + "xTickLabelSpacing": 100 + }, + "pluginVersion": "9.2.15", + "targets": [ + { + "datasource": { + "type": "postgres", + "uid": "P7B13B9DF907EC40C" + }, + "format": "table", + "group": [], + "metricColumn": "simulation", + "rawQuery": true, + "rawSql": "SELECT\n datetime,\n simulation,\n unit,\n episode,\n reward\nFROM rl_params\nwhere simulation like '${simulation}' || '_%'\nand evaluation_mode is true", + "refId": "A", + "select": [ + [ + { + "params": [ + "reward" + ], + "type": "column" + }, + { + "params": [ + "avg", + "24" + ], + "type": "moving_window" + }, + { + "params": [ + "avg_reward" + ], + "type": "alias" + } + ], + [ + { + "params": [ + "reward" + ], + "type": "column" + }, + { + "params": [ + "avg", + "24" + ], + "type": "moving_window" + }, + { + "params": [ + "avg_reward" + ], + "type": "alias" + } + ] + ], + "table": "rl_params", + "timeColumn": "index", + "timeColumnType": "timestamp", + "where": [ + { + "name": "$__timeFilter", + "params": [], + "type": "macro" + } + ] + } + ], + "title": "Average evaluation reward of all learning units", "transformations": [ { "id": "calculateField", @@ -506,7 +698,7 @@ "h": 15, "w": 24, "x": 0, - "y": 21 + "y": 29 }, "id": 14, "options": { @@ -624,7 +816,7 @@ "h": 11, "w": 24, "x": 0, - "y": 36 + "y": 44 }, "id": 22, "options": { @@ -737,7 +929,7 @@ "h": 8, "w": 24, "x": 0, - "y": 47 + "y": 55 }, "id": 24, "options": { @@ -826,7 +1018,7 @@ "h": 9, "w": 24, "x": 0, - "y": 55 + "y": 63 }, "id": 6, "options": { @@ -884,7 +1076,7 @@ "h": 1, "w": 24, "x": 0, - "y": 64 + "y": 72 }, "id": 16, "panels": [], @@ -955,7 +1147,7 @@ "h": 8, "w": 12, "x": 0, - "y": 65 + "y": 73 }, "id": 31, "options": { @@ -1163,7 +1355,7 @@ "h": 16, "w": 12, "x": 12, - "y": 65 + "y": 73 }, "id": 33, "options": { @@ -1383,7 +1575,7 @@ "h": 8, "w": 12, "x": 0, - "y": 73 + "y": 81 }, "id": 32, "options": { @@ -1594,7 +1786,7 @@ "h": 8, "w": 12, "x": 0, - "y": 81 + "y": 89 }, "id": 29, "options": { @@ -1708,7 +1900,7 @@ "h": 8, "w": 12, "x": 12, - "y": 81 + "y": 89 }, "id": 34, "options": { @@ -1806,8 +1998,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1822,7 +2013,7 @@ "h": 8, "w": 12, "x": 0, - "y": 89 + "y": 97 }, "id": 26, "options": { @@ -1920,8 +2111,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1936,7 +2126,7 @@ "h": 8, "w": 12, "x": 12, - "y": 89 + "y": 97 }, "id": 30, "options": { @@ -1998,26 +2188,22 @@ "list": [ { "current": { - "selected": true, - "text": [ - "example_01_rl_base" - ], - "value": [ - "example_01_rl_base" - ] + "selected": false, + "text": "example_01_rl_base", + "value": "example_01_rl_base" }, "datasource": { "type": "postgres", "uid": "P7B13B9DF907EC40C" }, - "definition": "SELECT DISTINCT\n SUBSTRING(m.simulation, 0, LENGTH(m.simulation) +1 - strpos(REVERSE(m.simulation),'_')) AS market_simulation\n FROM \n rl_params m", + "definition": "SELECT DISTINCT\nSUBSTRING(m.simulation, 0, LENGTH(m.simulation) +1 - strpos(REVERSE(m.simulation),'_')) AS market_simulation\nFROM rl_params m", "description": "Can choose which simulation we want to show ", "hide": 0, "includeAll": false, "multi": false, "name": "simulation", "options": [], - "query": "SELECT DISTINCT\n SUBSTRING(m.simulation, 0, LENGTH(m.simulation) +1 - strpos(REVERSE(m.simulation),'_')) AS market_simulation\n FROM \n rl_params m", + "query": "SELECT DISTINCT\nSUBSTRING(m.simulation, 0, LENGTH(m.simulation) +1 - strpos(REVERSE(m.simulation),'_')) AS market_simulation\nFROM rl_params m", "refresh": 2, "regex": "", "skipUrlSync": false, @@ -2034,7 +2220,7 @@ "type": "postgres", "uid": "P7B13B9DF907EC40C" }, - "definition": "SELECT distinct unit\nFROM rl_params\nwhere simulation like ${simulation} || '_%'", + "definition": "SELECT DISTINCT unit\nFROM rl_params\nwhere simulation like '${simulation}' || '_%'", "description": "All units that have an reinforcment learning strategy and hence have the Rl specific parameteres logged", "hide": 0, "includeAll": false, @@ -2042,7 +2228,7 @@ "multi": false, "name": "rl_unit", "options": [], - "query": "SELECT distinct unit\nFROM rl_params\nwhere simulation like ${simulation} || '_%'", + "query": "SELECT DISTINCT unit\nFROM rl_params\nwhere simulation like '${simulation}' || '_%'", "refresh": 2, "regex": "", "skipUrlSync": false, @@ -2053,7 +2239,7 @@ }, "time": { "from": "2018-12-31T23:00:00.000Z", - "to": "2019-01-05T22:59:59.000Z" + "to": "2019-01-31T22:59:59.000Z" }, "timepicker": { "refresh_intervals": [ @@ -2068,6 +2254,6 @@ "timezone": "", "title": "Assume: Training progress", "uid": "JKQzx0q4k", - "version": 6, + "version": 7, "weekStart": "" } diff --git a/examples/inputs/example_01_rl/config.yaml b/examples/inputs/example_01_rl/config.yaml index 7142c9fd..107291d7 100644 --- a/examples/inputs/example_01_rl/config.yaml +++ b/examples/inputs/example_01_rl/config.yaml @@ -1,8 +1,9 @@ base: start_date: 2019-01-01 00:00 - end_date: 2019-02-01 00:00 + end_date: 2019-01-10 00:00 time_step: 1h save_frequency_hours: Null + learning_mode: True learning_config: observation_dimension: 50 @@ -10,7 +11,6 @@ base: continue_learning: False load_model_path: None max_bid_price: 100 - learning_mode: True algorithm: matd3 learning_rate: 0.001 training_episodes: 30 @@ -23,7 +23,7 @@ base: noise_sigma: 0.1 noise_scale: 1 noise_dt: 1 - validation_episodes_interval: 10 + validation_episodes_interval: 5 markets_config: EOM: @@ -48,6 +48,7 @@ tiny: end_date: 2019-01-02 00:00 time_step: 1h save_frequency_hours: 24 + learning_mode: True learning_config: observation_dimension: 50 @@ -55,7 +56,6 @@ tiny: continue_learning: False load_model_path: None max_bid_price: 100 - learning_mode: True algorithm: matd3 learning_rate: 0.001 training_episodes: 3