From c3829da12a130d04f82812766b570a4caf91692b Mon Sep 17 00:00:00 2001 From: Alessandro Palmas Date: Thu, 21 Sep 2023 00:40:19 -0400 Subject: [PATCH] WIP - wrappers reworking completed, integration test ongoing, (final) next step: settings hints/exposing --- diambra/arena/wrappers/observation.py | 13 ---------- examples/wrappers_options.py | 14 ++++------- tests/test_random.py | 14 ++++++----- tests/test_recording_settings.py | 1 + tests/test_speed.py | 7 +++--- tests/test_wrappers_settings.py | 34 +++++++++++++++++---------- 6 files changed, 39 insertions(+), 44 deletions(-) diff --git a/diambra/arena/wrappers/observation.py b/diambra/arena/wrappers/observation.py index 2191d387..056cee74 100644 --- a/diambra/arena/wrappers/observation.py +++ b/diambra/arena/wrappers/observation.py @@ -328,19 +328,6 @@ def step(self, action): obs, reward, terminated, truncated, info = self.env.step(action) return self._process_obs(obs), reward, terminated, truncated, info - """ - def rename_key_recursive(dictionary, old_key, new_key): - if isinstance(dictionary, dict): - new_dict = {} - for key, value in dictionary.items(): - if key == old_key: - key = new_key - new_dict[key] = rename_key_recursive(value, old_key, new_key) - return new_dict - else: - return dictionary - """ - class FlattenFilterDictObs(gym.ObservationWrapper): def __init__(self, env, filter_keys): gym.ObservationWrapper.__init__(self, env) diff --git a/examples/wrappers_options.py b/examples/wrappers_options.py index 60facc5d..ea8f46fb 100644 --- a/examples/wrappers_options.py +++ b/examples/wrappers_options.py @@ -3,8 +3,7 @@ def main(): # Environment settings - #settings = {"n_players": 1, "action_space": SpaceTypes.MULTI_DISCRETE} - settings = {"n_players": 2, "action_space": (SpaceTypes.MULTI_DISCRETE, SpaceTypes.DISCRETE)} + settings = {"n_players": 1, "action_space": SpaceTypes.MULTI_DISCRETE} # Gym wrappers settings wrappers_settings = {} @@ -61,8 +60,8 @@ def main(): # optionally exclude images from normalization (False by default) # and optionally perform one-hot encoding also on discrete binary variables (False by default) wrappers_settings["scale"] = True - #wrappers_settings["exclude_image_scaling"] = True - #wrappers_settings["process_discrete_binary"] = True + wrappers_settings["exclude_image_scaling"] = True + wrappers_settings["process_discrete_binary"] = False # If to make the observation relative to the agent as a function to its role (P1 or P2) (deactivate by default) # i.e.: @@ -73,14 +72,11 @@ def main(): # - Under "agent_1", "P1" nesting level becomes "opp" and "P2" becomes "own" wrappers_settings["role_relative_observation"] = True - """ # Flattening observation dictionary and filtering # a sub-set of the RAM states wrappers_settings["flatten"] = True - wrappers_settings["filter_keys"] = ["stage", "timer", "agent_0_own_side", "agent_0_opp_side", - "agent_0_own_health", "agent_0_opp_char", - "agent_0_action_move", "agent_0_action_attack"] - """ + wrappers_settings["filter_keys"] = ["stage", "timer", "action", "own_side", "opp_side", + "own_health", "opp_health", "opp_character"] env = diambra.arena.make("doapp", settings, wrappers_settings, render_mode="human") diff --git a/tests/test_random.py b/tests/test_random.py index 53977017..f8dbbc42 100755 --- a/tests/test_random.py +++ b/tests/test_random.py @@ -86,15 +86,16 @@ def test_random_wrappers_mock(game_id, n_players, action_space, mocker): wrappers_settings["clip_rewards"] = False wrappers_settings["frame_stack"] = 4 wrappers_settings["dilation"] = 1 + wrappers_settings["add_last_action_to_observation"] = True wrappers_settings["actions_stack"] = 12 wrappers_settings["scale"] = True - wrappers_settings["scale_mod"] = 0 + wrappers_settings["role_relative_observation"] = True wrappers_settings["flatten"] = True suffix = "" if n_players == 2: suffix = "agent_0_" - wrappers_settings["filter_keys"] = ["stage", "timer", suffix+"own_side", suffix+"opp_side", - suffix+"opp_char", suffix+"action_move", suffix+"action_attack"] + wrappers_settings["filter_keys"] = ["stage", "timer", suffix + "own_side", suffix + "opp_side", + suffix + "opp_character", suffix + "action"] assert func(game_id, n_players, action_space, frame_shape, wrappers_settings, no_action_probability, use_mock_env, mocker) == 0 @@ -115,15 +116,16 @@ def test_random_integration(game_id, n_players, action_space, mocker): wrappers_settings["clip_rewards"] = False wrappers_settings["frame_stack"] = 4 wrappers_settings["dilation"] = 1 + wrappers_settings["add_last_action_to_observation"] = True wrappers_settings["actions_stack"] = 12 wrappers_settings["scale"] = True - wrappers_settings["scale_mod"] = 0 + wrappers_settings["role_relative_observation"] = True wrappers_settings["flatten"] = True suffix = "" if n_players == 2: suffix = "agent_0_" - wrappers_settings["filter_keys"] = ["stage", "timer", suffix+"own_side", suffix+"opp_side", - suffix+"opp_char", suffix+"action_move", suffix+"action_attack"] + wrappers_settings["filter_keys"] = ["stage", "timer", suffix + "own_side", suffix + "opp_side", + suffix + "opp_character", suffix + "action"] assert func(game_id, n_players, action_space, frame_shape, wrappers_settings, no_action_probability, use_mock_env, mocker) == 0 \ No newline at end of file diff --git a/tests/test_recording_settings.py b/tests/test_recording_settings.py index 2bef28d8..83171942 100644 --- a/tests/test_recording_settings.py +++ b/tests/test_recording_settings.py @@ -66,6 +66,7 @@ def test_settings_recording(game_id ,username, dataset_path, n_players, action_s wrappers_settings["frame_shape"] = (128, 128, 1) wrappers_settings["reward_normalization"] = True wrappers_settings["frame_stack"] = 4 + wrappers_settings["add_last_action_to_observation"] = True wrappers_settings["actions_stack"] = 12 wrappers_settings["scale"] = True diff --git a/tests/test_speed.py b/tests/test_speed.py index b7aa7707..41fb0a92 100644 --- a/tests/test_speed.py +++ b/tests/test_speed.py @@ -76,15 +76,16 @@ def test_speed_wrappers(n_players, mocker): wrappers_settings["clip_rewards"] = False wrappers_settings["frame_stack"] = 4 wrappers_settings["dilation"] = 1 + wrappers_settings["add_last_action_to_observation"] = True wrappers_settings["actions_stack"] = 12 wrappers_settings["scale"] = True - wrappers_settings["scale_mod"] = 0 + wrappers_settings["role_relative_observation"] = True wrappers_settings["flatten"] = True suffix = "" if n_players == 2: suffix = "agent_0_" - wrappers_settings["filter_keys"] = ["stage", "timer", suffix+"own_side", suffix+"opp_side", suffix+"opp_side", - suffix+"opp_char", suffix+"action_move", suffix+"action_attack"] + wrappers_settings["filter_keys"] = ["stage", "timer", suffix + "own_side", suffix + "opp_side", + suffix + "opp_character", suffix + "action"] assert func(n_players, wrappers_settings, target_speeds[1], mocker) == 0 diff --git a/tests/test_wrappers_settings.py b/tests/test_wrappers_settings.py index 7a02b254..9a19ad3f 100644 --- a/tests/test_wrappers_settings.py +++ b/tests/test_wrappers_settings.py @@ -26,41 +26,46 @@ def func(settings, wrappers_settings, episode_recording_settings, mocker): print("ERROR, ABORTED.") return 1 -wrappers_settings_var_order = ["no_attack_buttons_combinations", "no_op_max", "sticky_actions", "frame_shape", "reward_normalization", - "reward_normalization_factor", "clip_rewards", "frame_stack", "dilation", - "actions_stack", "scale", "flatten", "filter_keys", "wrappers"] +wrappers_settings_var_order = ["no_op_max", "sticky_actions", "reward_normalization", "reward_normalization_factor", + "clip_rewards", "no_attack_buttons_combinations", "frame_shape", "frame_stack", "dilation", + "add_last_action_to_observation", "actions_stack", "scale", "role_relative_observation", + "flatten", "filter_keys", "wrappers"] games_dict = available_games(False) ok_test_parameters = { - "no_attack_buttons_combinations": [True, False], "no_op_max": [0, 2], "sticky_actions": [1, 4], - "frame_shape": [(0, 0, 0), (84, 84, 1), (84, 84, 3), (84, 84, 0)], "reward_normalization": [True, False], "reward_normalization_factor": [0.2, 0.5], "clip_rewards": [True, False], + "no_attack_buttons_combinations": [True, False], + "frame_shape": [(0, 0, 0), (84, 84, 1), (84, 84, 3), (84, 84, 0)], "frame_stack": [1, 5], "dilation": [1, 3], + "add_last_action_to_observation": [True, False], "actions_stack": [1, 6], "scale": [True, False], + "role_relative_observation": [True, False], "flatten": [True, False], "filter_keys": [[], ["stage", "own_side"]], "wrappers": [[]], } ko_test_parameters = { - "no_attack_buttons_combinations": [-1], "no_op_max": [-1], "sticky_actions": [True], - "frame_shape": [(0, 84, 3), (128, 0, 1)], "reward_normalization": ["True"], "reward_normalization_factor": [-10], "clip_rewards": [0.5], + "no_attack_buttons_combinations": [-1], + "frame_shape": [(0, 84, 3), (128, 0, 1)], "frame_stack": [0], "dilation": [0], + "add_last_action_to_observation": [10], "actions_stack": [-2], "scale": [10], + "role_relative_observation": [24], "flatten": [None], "filter_keys": [12], "wrappers": ["test"], @@ -75,9 +80,10 @@ def pytest_generate_tests(metafunc): @pytest.mark.parametrize("step_ratio", [1]) @pytest.mark.parametrize("n_players", [1, 2]) @pytest.mark.parametrize("action_space", [SpaceTypes.DISCRETE, SpaceTypes.MULTI_DISCRETE]) -def test_wrappers_settings(game_id, step_ratio, n_players, action_space, no_attack_buttons_combinations, no_op_max, sticky_actions, - frame_shape, reward_normalization, reward_normalization_factor, - clip_rewards, frame_stack, dilation, actions_stack, scale, +def test_wrappers_settings(game_id, step_ratio, n_players, action_space, no_op_max, sticky_actions, + reward_normalization, reward_normalization_factor, clip_rewards, + no_attack_buttons_combinations, frame_shape, frame_stack, dilation, + add_last_action_to_observation, actions_stack, scale, role_relative_observation, flatten, filter_keys, wrappers, expected, mocker): # Env settings @@ -91,17 +97,19 @@ def test_wrappers_settings(game_id, step_ratio, n_players, action_space, no_atta # Env wrappers settings wrappers_settings = {} - wrappers_settings["no_attack_buttons_combinations"] = no_attack_buttons_combinations wrappers_settings["no_op_max"] = no_op_max wrappers_settings["sticky_actions"] = sticky_actions - wrappers_settings["frame_shape"] = frame_shape wrappers_settings["reward_normalization"] = reward_normalization wrappers_settings["reward_normalization_factor"] = reward_normalization_factor wrappers_settings["clip_rewards"] = clip_rewards + wrappers_settings["no_attack_buttons_combinations"] = no_attack_buttons_combinations + wrappers_settings["frame_shape"] = frame_shape wrappers_settings["frame_stack"] = frame_stack wrappers_settings["dilation"] = dilation - wrappers_settings["actions_stack"] = actions_stack + wrappers_settings["add_last_action_to_observation"] = add_last_action_to_observation + wrappers_settings["actions_stack"] = 1 if add_last_action_to_observation is False and expected == 0 else actions_stack wrappers_settings["scale"] = scale + wrappers_settings["role_relative_observation"] = role_relative_observation wrappers_settings["flatten"] = flatten wrappers_settings["filter_keys"] = filter_keys wrappers_settings["wrappers"] = wrappers