From ef1f465c11189e8ea5920565c12634c8c1762126 Mon Sep 17 00:00:00 2001 From: Alessandro Palmas Date: Sun, 17 Sep 2023 16:52:11 -0400 Subject: [PATCH] WIP - Acquire engine RAM states refactoring --- diambra/arena/arena_gym.py | 203 +++++++++-------------- diambra/arena/env_settings.py | 2 - diambra/arena/utils/engine_mock.py | 62 ++++--- diambra/arena/utils/integratedGames.json | 196 ++++++++++------------ diambra/arena/wrappers/arena_wrappers.py | 32 ++-- diambra/arena/wrappers/observation.py | 14 -- tests/env_exec_interface.py | 29 +--- tests/man_test_random.py | 1 + tests/test_wrappers_settings.py | 7 +- 9 files changed, 218 insertions(+), 328 deletions(-) diff --git a/diambra/arena/arena_gym.py b/diambra/arena/arena_gym.py index 637326e1..1abc93bc 100644 --- a/diambra/arena/arena_gym.py +++ b/diambra/arena/arena_gym.py @@ -14,7 +14,6 @@ class DiambraGymBase(gym.Env): """Diambra Environment gymnasium base interface""" metadata = {"render_modes": ["human", "rgb_array"]} _frame = None - _last_action = None reward_normalization_value = 1.0 render_gui_started = False @@ -55,11 +54,44 @@ def __init__(self, env_settings: Union[EnvironmentSettings1P, EnvironmentSetting self.print_actions_dict = [move_dict, attack_dict] # Maximum difference in players health - for k in sorted(self.env_info.ram_states.keys()): - if "health" in k: - self.max_delta_health = self.env_info.ram_states[k].max - self.env_info.ram_states[k].min + category_key_enum = model.RamStatesCategories.Value("P1") + for k in sorted(self.env_info.ram_states_categories[category_key_enum].ram_states.keys()): + key_enum_name = model.RamStates.Name(k) + if "health" in key_enum_name: + self.max_delta_health = self.env_info.ram_states_categories[category_key_enum].ram_states[k].max - \ + self.env_info.ram_states_categories[category_key_enum].ram_states[k].min break + # Observation space + # Dictionary + observation_space_dict = {} + observation_space_dict['frame'] = gym.spaces.Box(low=0, high=255, shape=(self.env_info.frame_shape.h, + self.env_info.frame_shape.w, + self.env_info.frame_shape.c), + dtype=np.uint8) + + # Adding RAM states observations + for k, v in self.env_info.ram_states_categories.items(): + print("Processing {}, {}".format(model.RamStatesCategories.Name(k), v)) + if k == model.RamStatesCategories.common: + target_dict = observation_space_dict + else: + observation_space_dict[model.RamStatesCategories.Name(k)] = {} + target_dict = observation_space_dict[model.RamStatesCategories.Name(k)] + + for k2, v2 in v.ram_states.items(): + if v2.type == SpaceType.BINARY or v2.type == SpaceType.DISCRETE: + target_dict[model.RamStates.Name(k2)] = gym.spaces.Discrete(v2.max + 1) + elif v2.type == SpaceType.BOX: + target_dict[model.RamStates.Name(k2)] = gym.spaces.Box(low=v2.min, high=v2.max, shape=(1,), dtype=np.int16) + else: + raise RuntimeError("Only Discrete (Binary/Categorical) | Box Spaces allowed") + + for space_key in [model.RamStatesCategories.P1, model.RamStatesCategories.P2]: + observation_space_dict[model.RamStatesCategories.Name(space_key)] = gym.spaces.Dict(observation_space_dict[model.RamStatesCategories.Name(space_key)]) + + self.observation_space = gym.spaces.Dict(observation_space_dict) + # Return env action list def get_actions_tuples(self): return self.actions_tuples @@ -81,7 +113,6 @@ def get_cumulative_reward_bounds(self): # Reset the environment def reset(self, seed: int = None, options: Dict[str, Any] = None): - self._last_action = [[0, 0], [0, 0]] if options is None: options = {} options["seed"] = seed @@ -109,21 +140,13 @@ def render(self, wait_key=1): return self._frame # Print observation details to the console - def show_obs(self, observation, wait_key=1, viz=True, string="observation", key=None): + def show_obs(self, observation, wait_key=1, viz=True, string="observation", key=None, outermost=True): if type(observation) == dict: for k, v in sorted(observation.items()): - self.show_obs(v, wait_key=wait_key, viz=viz, string=string + "[\"{}\"]".format(k), key=k) + self.show_obs(v, wait_key=wait_key, viz=viz, string=string + "[\"{}\"]".format(k), key=k, outermost=False) else: if key != "frame": - if "action" in key: - out_value = observation - additional_string = ": " - if isinstance(observation, (int, np.integer)) is False: - n_actions_stack = int(observation.size / (self.n_actions[0] if "move" in key else self.n_actions[1])) - out_value = np.reshape(observation, [n_actions_stack, -1]) - additional_string = " (reshaped for visualization):\n" - print(string + "{}{}".format(additional_string, out_value)) - elif "own_char" in key or "opp_char" in key: + if key.startswith("character"): char_idx = observation if type(observation) == int else np.where(observation == 1)[0][0] print(string + ": {} / {}".format(observation, self.env_info.characters_info.char_list[char_idx])) else: @@ -136,44 +159,21 @@ def show_obs(self, observation, wait_key=1, viz=True, string="observation", key= norm_factor = 255 if np.amax(observation) > 1.0 else 1.0 for idx in range(observation.shape[2]): cv2.imshow("[{}] Frame channel {}".format(os.getpid(), idx), observation[:, :, idx] / norm_factor) - - cv2.waitKey(wait_key) except: pass + if outermost is True and viz is True and (sys.platform.startswith('linux') is False or 'DISPLAY' in os.environ): + try: + cv2.waitKey(wait_key) + except: + pass + # Closing the environment def close(self): # Close DIAMBRA Arena cv2.destroyAllWindows() self.arena_engine.close() - def _get_ram_states_obs_dict(self): - player_spec_dict = {} - generic_dict = {} - # Adding env additional observations (side-specific) - for k, v in self.env_info.ram_states.items(): - if k.endswith("P1"): - target_dict = player_spec_dict - knew = "own_" + k[:-3] - elif k.endswith("P2"): - target_dict = player_spec_dict - knew = "opp_" + k[:-3] - else: - target_dict = generic_dict - knew = k - - if v.type == SpaceType.BINARY or v.type == SpaceType.DISCRETE: - target_dict[knew] = gym.spaces.Discrete(v.max + 1) - elif v.type == SpaceType.BOX: - target_dict[knew] = gym.spaces.Box(low=v.min, high=v.max, shape=(1,), dtype=np.int32) - else: - raise RuntimeError("Only Discrete (Binary/Categorical) | Box Spaces allowed") - - player_spec_dict["action_move"] = gym.spaces.Discrete(self.n_actions[0]) - player_spec_dict["action_attack"] = gym.spaces.Discrete(self.n_actions[1]) - - return generic_dict, player_spec_dict - # Get frame def _get_frame(self, response): self._frame = np.frombuffer(response.observation.frame, dtype='uint8').reshape(self.env_info.frame_shape.h, \ @@ -183,56 +183,38 @@ def _get_frame(self, response): # Get info def _get_info(self, response): - info = dict(response.info.game_states) + info = {model.GameStates.Name(k): v for k, v in response.info.game_states.items()} info["settings"] = self.env_settings.pb_model return info - # Integrate player specific RAM states into observation - def _player_specific_ram_states_integration(self, response, idx): - player_spec_dict = {} - generic_dict = {} - - # Adding env additional observations (side-specific) - player_role = self.env_settings.pb_model.episode_settings.player_settings[idx].role - for k, v in self.env_info.ram_states.items(): - if (k.endswith("P1") or k.endswith("P2")): - target_dict = player_spec_dict - if k[-2:] == player_role: - knew = "own_" + k[:-3] - else: - knew = "opp_" + k[:-3] + def _get_obs(self, response): + observation = {} + observation["frame"] = self._get_frame(response) + + # Adding RAM states observations + for k, v in self.env_info.ram_states_categories.items(): + if k == model.RamStatesCategories.common: + target_dict = observation else: - target_dict = generic_dict - knew = k + observation[model.RamStatesCategories.Name(k)] = {} + target_dict = observation[model.RamStatesCategories.Name(k)] - # Box spaces - if v.type == SpaceType.BOX: - target_dict[knew] = np.array([response.observation.ram_states[k]], dtype=np.int32) - else: # Discrete spaces (binary / categorical) - target_dict[knew] = response.observation.ram_states[k] + category_ram_states = response.observation.ram_states_categories[k] - player_spec_dict["action_move"] = self._last_action[idx][0] - player_spec_dict["action_attack"] = self._last_action[idx][1] + for k2, v2 in v.ram_states.items(): + # Box spaces + if v2.type == SpaceType.BOX: + target_dict[model.RamStates.Name(k2)] = np.array([category_ram_states.ram_states[k2]]) + else: # Discrete spaces (binary / categorical) + target_dict[model.RamStates.Name(k2)] = category_ram_states.ram_states[k2] - return generic_dict, player_spec_dict + return observation class DiambraGym1P(DiambraGymBase): """Diambra Environment gymnasium single agent interface""" def __init__(self, env_settings): super().__init__(env_settings) - # Observation space - # Dictionary - observation_space_dict = {} - observation_space_dict['frame'] = gym.spaces.Box(low=0, high=255, shape=(self.env_info.frame_shape.h, - self.env_info.frame_shape.w, - self.env_info.frame_shape.c), - dtype=np.uint8) - generic_obs_dict, player_obs_dict = self._get_ram_states_obs_dict() - observation_space_dict.update(generic_obs_dict) - observation_space_dict.update(player_obs_dict) - self.observation_space = gym.spaces.Dict(observation_space_dict) - # Action space # MultiDiscrete actions: # - Arrows -> One discrete set @@ -256,44 +238,17 @@ def get_no_op_action(self): # Step the environment def step(self, action: Union[int, List[int]]): # Defining move and attack actions P1/P2 as a function of action_space - if isinstance(self.action_space, gym.spaces.MultiDiscrete): - self._last_action[0] = action - else: - self._last_action[0] = list(discrete_to_multi_discrete_action(action, self.n_actions[0])) - response = self.arena_engine.step(self._last_action) - observation = self._get_obs(response) + if isinstance(self.action_space, gym.spaces.Discrete): + action = list(discrete_to_multi_discrete_action(action, self.n_actions[0])) + response = self.arena_engine.step([action]) - return observation, response.reward, response.info.game_states["episode_done"], False, self._get_info(response) - - def _get_obs(self, response): - observation = {} - observation["frame"] = self._get_frame(response) - generic_obs_dict, player_obs_dict = self._player_specific_ram_states_integration(response, 0) - observation.update(generic_obs_dict) - observation.update(player_obs_dict) - - return observation + return self._get_obs(response), response.reward, response.info.game_states[model.GameStates.episode_done], False, self._get_info(response) class DiambraGym2P(DiambraGymBase): """Diambra Environment gymnasium multi-agent interface""" def __init__(self, env_settings): super().__init__(env_settings) - # Dictionary observation space - observation_space_dict = {} - observation_space_dict['frame'] = gym.spaces.Box(low=0, high=255, - shape=(self.env_info.frame_shape.h, - self.env_info.frame_shape.w, - self.env_info.frame_shape.c), - dtype=np.uint8) - - generic_obs_dict, player_obs_dict = self._get_ram_states_obs_dict() - observation_space_dict.update(generic_obs_dict) - observation_space_dict["agent_0"] = gym.spaces.Dict(player_obs_dict) - observation_space_dict["agent_1"] = gym.spaces.Dict(player_obs_dict) - - self.observation_space = gym.spaces.Dict(observation_space_dict) - # Action space # Dictionary action_spaces_values = {SpaceType.MULTI_DISCRETE: gym.spaces.MultiDiscrete(self.n_actions), @@ -312,30 +267,20 @@ def get_no_op_action(self): def step(self, actions: Dict[str, Union[int, List[int]]]): # NOTE: the assumption in current interface is that we have actions sorted as agent's order actions = sorted(actions.items()) + action_list = [[],[]] for idx, action in enumerate(actions): # Defining move and attack actions P1/P2 as a function of action_space if isinstance(self.action_space[action[0]], gym.spaces.MultiDiscrete): - self._last_action[idx] = action[1] + action_list[idx] = action[1] else: - self._last_action[idx] = list(discrete_to_multi_discrete_action(action[1], self.n_actions[0])) - response = self.arena_engine.step(self._last_action) - observation = self._get_obs(response) + action_list[idx] = list(discrete_to_multi_discrete_action(action[1], self.n_actions[0])) + response = self.arena_engine.step(action_list) - return observation, response.reward, response.info.game_states["game_done"], False, self._get_info(response) + return self._get_obs(response), response.reward, response.info.game_states[model.GameStates.game_done], False, self._get_info(response) def _map_action_spaces_to_agents(self, values_dict): out_dict = {} for idx, action_space in enumerate(self.env_settings.action_space): out_dict["agent_{}".format(idx)] = values_dict[action_space] - return out_dict - - def _get_obs(self, response): - observation = {} - observation["frame"] = self._get_frame(response) - for idx in range(self.env_settings.n_players): - generic_obs_dict, player_obs_dict = self._player_specific_ram_states_integration(response, idx) - observation["agent_{}".format(idx)] = player_obs_dict - observation.update(generic_obs_dict) - - return observation \ No newline at end of file + return out_dict \ No newline at end of file diff --git a/diambra/arena/env_settings.py b/diambra/arena/env_settings.py index 8868704d..abd05478 100644 --- a/diambra/arena/env_settings.py +++ b/diambra/arena/env_settings.py @@ -372,7 +372,6 @@ class WrappersSettings: scale: bool = False exclude_image_scaling: bool = False process_discrete_binary: bool = False - scale_mod: int = 0 frame_shape: Tuple[int, int, int] = (0, 0, 0) flatten: bool = False filter_keys: List[str] = None @@ -384,7 +383,6 @@ def sanity_check(self): check_num_in_range("frame_stack", self.frame_stack, [1, MAX_STACK_VALUE]) check_num_in_range("dilation", self.dilation, [1, MAX_STACK_VALUE]) check_num_in_range("actions_stack", self.actions_stack, [1, MAX_STACK_VALUE]) - check_num_in_range("scale_mod", self.scale_mod, [0, 0]) check_num_in_range("reward_normalization_factor", self.reward_normalization_factor, [0.0, 1000000]) check_val_in_list("frame_shape[2]", self.frame_shape[2], [0, 1, 3]) diff --git a/diambra/arena/utils/engine_mock.py b/diambra/arena/utils/engine_mock.py index 8263e1ab..07def298 100644 --- a/diambra/arena/utils/engine_mock.py +++ b/diambra/arena/utils/engine_mock.py @@ -2,6 +2,7 @@ import random import numpy as np import diambra.arena +from copy import deepcopy from diambra.engine import model class DiambraEngineMock: @@ -69,12 +70,16 @@ def mock_env_init(self, env_settings_pb): self.delta_health = self.game_data["health"][1] - self.game_data["health"][0] self.base_hit = int(self.delta_health * self.game_data["n_actions"][1] / ((self.game_data["n_actions"][0] + self.game_data["n_actions"][1]) * - (self.game_data["ram_states"]["timer"][2] / self.settings.step_ratio))) + (self.game_data["ram_states"]["common"]["timer"][2] / self.settings.step_ratio))) # Generate the ram states map - self.ram_states = self.game_data["ram_states"] + self.ram_states = {} + self.ram_states["common"] = self.game_data["ram_states"]["common"] + self.ram_states["P1"] = deepcopy(self.game_data["ram_states"]["Px"]) + self.ram_states["P2"] = deepcopy(self.game_data["ram_states"]["Px"]) for k, v in self.ram_states.items(): - self.ram_states[k].append(0) + for k2, v2 in v.items(): + self.ram_states[k][k2].append(0) # Build the response response = model.EnvInitResponse() @@ -128,9 +133,12 @@ def mock_env_init(self, env_settings_pb): # RAM states self._generate_ram_states() for k, v in self.ram_states.items(): - response.ram_states[k].type = model.SpaceType.Value(v[0]) - response.ram_states[k].min = v[1] - response.ram_states[k].max = v[2] + k_enum = model.RamStatesCategories.Value(k) + for k2, v2 in v.items(): + k2_enum =model.RamStates.Value(k2) + response.ram_states_categories[k_enum].ram_states[k2_enum].type = model.SpaceType.Value(v2[0]) + response.ram_states_categories[k_enum].ram_states[k2_enum].min = v2[1] + response.ram_states_categories[k_enum].ram_states[k2_enum].max = v2[2] return response @@ -149,7 +157,6 @@ def mock_reset(self, episode_settings): # Step the environment [pb low level] def mock_step(self, actions): - # Update class state self._new_game_state(actions) @@ -160,27 +167,27 @@ def mock_close(self): pass def _generate_ram_states(self): - for k, v in self.ram_states.items(): - self.ram_states[k][3] = random.choice(range(v[1], v[2] + 1)) + for k2, v2 in v.items(): + self.ram_states[k][k2][3] = random.choice(range(v2[1], v2[2] + 1)) # Setting meaningful values to ram states - self.ram_states["stage"][3] = self.current_stage_number - self.ram_states["side_P1"][3] = self.side["P1"] - self.ram_states["side_P2"][3] = self.side["P2"] - self.ram_states["wins_P1"][3] = self.n_rounds_won - self.ram_states["wins_P2"][3] = self.n_rounds_lost + self.ram_states["common"]["stage"][3] = self.current_stage_number + self.ram_states["P1"]["side"][3] = self.side["P1"] + self.ram_states["P2"]["side"][3] = self.side["P2"] + self.ram_states["P1"]["wins"][3] = self.n_rounds_won + self.ram_states["P2"]["wins"][3] = self.n_rounds_lost values = [self.char, self.health] - for idx, state in enumerate(["char", "health"]): + for idx, state in enumerate(["character", "health"]): for text in ["", "_1", "_2", "_3"]: for player in ["P1", "P2"]: - key = "{}{}_{}".format(state, text, player) - if (key in self.ram_states): - self.ram_states[key][3] = values[idx][player] + key = "{}{}".format(state, text) + if (key in self.ram_states[player]): + self.ram_states[player][key][3] = values[idx][player] - self.ram_states["timer"][3] = int(self.timer) + self.ram_states["common"]["timer"][3] = int(self.timer) def _generate_frame(self): frame = np.ones((self.frame_shape), dtype=np.int8) * ((self.current_stage_number * self.game_data["rounds_per_stage"] + int(self.timer)) % 255) @@ -217,7 +224,7 @@ def _reset_state(self): self.side["P2"] = 1 if self.settings.episode_settings.player_settings[0].role == "P1" else 0 self.health["P1"] = self.game_data["health"][1] self.health["P2"] = self.game_data["health"][1] - self.timer = self.game_data["ram_states"]["timer"][2] + self.timer = self.game_data["ram_states"]["common"]["timer"][2] self.reward = 0 @@ -326,7 +333,7 @@ def _new_game_state(self, actions): self.side["P2"] = 1 self.health["P1"] = self.game_data["health"][1] self.health["P2"] = self.game_data["health"][1] - self.timer = self.game_data["ram_states"]["timer"][2] + self.timer = self.game_data["ram_states"]["common"]["timer"][2] # Set perfect chance self._set_perfect_chance() @@ -342,14 +349,15 @@ def _update_step_reset_response(self): # Ram states self._generate_ram_states() for k, v in self.ram_states.items(): - response.observation.ram_states[k] = v[3] + for k2, v2 in v.items(): + response.observation.ram_states_categories[model.RamStatesCategories.Value(k)].ram_states[model.RamStates.Value(k2)] = v2[3] # Game state - response.info.game_states["round_done"] = self.round_done_ - response.info.game_states["stage_done"] = self.stage_done_ - response.info.game_states["game_done"] = self.game_done_ - response.info.game_states["episode_done"] = self.episode_done_ - response.info.game_states["env_done"] = self.env_done_ + response.info.game_states[model.GameStates.round_done] = self.round_done_ + response.info.game_states[model.GameStates.stage_done] = self.stage_done_ + response.info.game_states[model.GameStates.game_done] = self.game_done_ + response.info.game_states[model.GameStates.episode_done] = self.episode_done_ + response.info.game_states[model.GameStates.env_done] = self.env_done_ # Frame response.observation.frame = self._generate_frame() diff --git a/diambra/arena/utils/integratedGames.json b/diambra/arena/utils/integratedGames.json index a176d3cf..689e1d6a 100644 --- a/diambra/arena/utils/integratedGames.json +++ b/diambra/arena/utils/integratedGames.json @@ -34,16 +34,16 @@ "health": [0, 208], "frame_shape": [480, 512, 3], "ram_states": { - "side_P1": ["BINARY", 0, 1], - "side_P2": ["BINARY", 0, 1], - "wins_P1": ["BOX", 0, 2], - "wins_P2": ["BOX", 0, 2], - "stage": ["BOX", 1, 8], - "char_P1": ["DISCRETE", 0, 10], - "char_P2": ["DISCRETE", 0, 10], - "health_P1": ["BOX", 0, 208], - "health_P2": ["BOX", 0, 208], - "timer": ["BOX", 0, 40] + "common": { + "stage": ["BOX", 1, 8], + "timer": ["BOX", 0, 40] + }, + "Px": { + "side": ["BINARY", 0, 1], + "wins": ["BOX", 0, 2], + "character": ["DISCRETE", 0, 10], + "health": ["BOX", 0, 208] + } }, "cfg": {"H": "But6", "P": "But1", "K": "But2"} }, @@ -86,28 +86,22 @@ "health": [-1, 160], "frame_shape": [224, 384, 3], "ram_states": { - "side_P1": ["BINARY", 0, 1], - "side_P2": ["BINARY", 0, 1], - "wins_P1": ["BOX", 0, 2], - "wins_P2": ["BOX", 0, 2], - "stage": ["BOX", 1, 10], - "char_P1": ["DISCRETE", 0, 19], - "char_P2": ["DISCRETE", 0, 19], - "stun_bar_P1": ["BOX", 0, 72], - "stun_bar_P2": ["BOX", 0, 72], - "stunned_P1": ["BINARY", 0, 1], - "stunned_P2": ["BINARY", 0, 1], - "super_bar_P1": ["BOX", 0, 128], - "super_bar_P2": ["BOX", 0, 128], - "super_type_P1": ["DISCRETE", 0, 2], - "super_type_P2": ["DISCRETE", 0, 2], - "super_count_P1": ["BOX", 0, 3], - "super_count_P2": ["BOX", 0, 3], - "super_max_count_P1": ["BOX", 1, 3], - "super_max_count_P2": ["BOX", 1, 3], - "health_P1": ["BOX", -1, 160], - "health_P2": ["BOX", -1, 160], - "timer": ["BOX", 0, 99] + "common": { + "stage": ["BOX", 1, 10], + "timer": ["BOX", 0, 99] + }, + "Px": { + "side": ["BINARY", 0, 1], + "wins": ["BOX", 0, 2], + "character": ["DISCRETE", 0, 19], + "stun_bar": ["BOX", 0, 72], + "stunned": ["BINARY", 0, 1], + "super_bar": ["BOX", 0, 128], + "super_type": ["DISCRETE", 0, 2], + "super_count": ["BOX", 0, 3], + "super_max_count": ["BOX", 1, 3], + "health": ["BOX", -1, 160] + } }, "cfg": {"LP": "But4", "MP": "But1", "HP": "But5", "LK": "But3", "MK": "But2", "HK": "But6"} }, @@ -166,26 +160,21 @@ "health": [0, 182], "frame_shape": [240, 512, 3], "ram_states": { - "side_P1": ["BINARY", 0, 1], - "side_P2": ["BINARY", 0, 1], - "wins_P1": ["BOX", 0, 2], - "wins_P2": ["BOX", 0, 2], - "stage": ["BOX", 1, 8], - "char_P1": ["DISCRETE", 0, 38], - "char_P2": ["DISCRETE", 0, 38], - "char_1_P1": ["DISCRETE", 0, 38], - "char_1_P2": ["DISCRETE", 0, 38], - "char_2_P1": ["DISCRETE", 0, 38], - "char_2_P2": ["DISCRETE", 0, 38], - "health_1_P1": ["BOX", 0, 182], - "health_1_P2": ["BOX", 0, 182], - "health_2_P1": ["BOX", 0, 182], - "health_2_P2": ["BOX", 0, 182], - "active_char_P1": ["BINARY", 0, 1], - "active_char_P2": ["BINARY", 0, 1], - "bar_status_P1": ["DISCRETE", 0, 4], - "bar_status_P2": ["DISCRETE", 0, 4], - "timer": ["BOX", 0, 60] + "common": { + "stage": ["BOX", 1, 8], + "timer": ["BOX", 0, 60] + }, + "Px": { + "side": ["BINARY", 0, 1], + "wins": ["BOX", 0, 2], + "character": ["DISCRETE", 0, 38], + "character_1": ["DISCRETE", 0, 38], + "character_2": ["DISCRETE", 0, 38], + "health_1": ["BOX", 0, 227], + "health_2": ["BOX", 0, 227], + "active_character": ["BINARY", 0, 1], + "bar_status": ["DISCRETE", 0, 4] + } }, "cfg": {"LP": "But4", "RP": "But1", "LK": "But3", "RK": "But2", "TAG": "But6"} }, @@ -227,18 +216,18 @@ "health": [0, 166], "frame_shape": [254, 500, 3], "ram_states": { - "side_P1": ["BINARY", 0, 1], - "side_P2": ["BINARY", 0, 1], - "wins_P1": ["BOX", 0, 2], - "wins_P2": ["BOX", 0, 2], - "stage": ["BOX", 1, 11], - "char_P1": ["DISCRETE", 0, 25], - "char_P2": ["DISCRETE", 0, 25], - "aggressor_bar_P1": ["BOX", 0, 48], - "aggressor_bar_P2": ["BOX", 0, 48], - "health_P1": ["BOX", 0, 166], - "health_P2": ["BOX", 0, 166], - "timer": ["BOX", 0, 100] + "common": { + "stage": ["BOX", 1, 11], + "timer": ["BOX", 0, 100] + }, + "Px": { + "side": ["BINARY", 0, 1], + "wins": ["BOX", 0, 2], + "character": ["DISCRETE", 0, 25], + "aggressor_bar": ["BOX", 0, 48], + "health": ["BOX", 0, 166] + } + }, "cfg": {"HP": "But1", "HK": "But2", "LK": "But3", "LP": "But4", "RUN": "But5", "BLK": "But6"} }, @@ -283,30 +272,23 @@ "health": [0, 125], "frame_shape": [224, 320, 3], "ram_states": { - "side_P1": ["BINARY", 0, 1], - "side_P2": ["BINARY", 0, 1], - "wins_P1": ["BOX", 0, 3], - "wins_P2": ["BOX", 0, 3], - "stage": ["BOX", 1, 7], - "char_P1": ["DISCRETE", 0, 27], - "char_P2": ["DISCRETE", 0, 27], - "rage_on_P1": ["BINARY", 0, 1], - "rage_on_P2": ["BINARY", 0, 1], - "weapon_lost_P1": ["BINARY", 0, 1], - "weapon_lost_P2": ["BINARY", 0, 1], - "weapon_fight_P1": ["BINARY", 0, 1], - "weapon_fight_P2": ["BINARY", 0, 1], - "rage_used_P1": ["BINARY", 0, 1], - "rage_used_P2": ["BINARY", 0, 1], - "rage_bar_P1": ["BOX", 0, 164096], - "rage_bar_P2": ["BOX", 0, 164096], - "weapon_bar_P1": ["BOX", 0, 120], - "weapon_bar_P2": ["BOX", 0, 120], - "power_bar_P1": ["BOX", 0, 64], - "power_bar_P2": ["BOX", 0, 64], - "health_P1": ["BOX", 0, 125], - "health_P2": ["BOX", 0, 125], - "timer": ["BOX", 0, 60] + "common": { + "stage": ["BOX", 1, 7], + "timer": ["BOX", 0, 60] + }, + "Px": { + "side": ["BINARY", 0, 1], + "wins": ["BOX", 0, 3], + "character": ["DISCRETE", 0, 27], + "rage_on": ["BINARY", 0, 1], + "weapon_lost": ["BINARY", 0, 1], + "weapon_fight": ["BINARY", 0, 1], + "rage_used": ["BINARY", 0, 1], + "rage_bar": ["BOX", 0, 100], + "weapon_bar": ["BOX", 0, 120], + "power_bar": ["BOX", 0, 64], + "health": ["BOX", 0, 125] + } }, "cfg": {"WS": "But1", "MS": "But2", "K": "But3", "M": "But4"} }, @@ -351,28 +333,22 @@ "health": [-1, 119], "frame_shape": [240, 320, 3], "ram_states": { - "side_P1": ["BINARY", 0, 1], - "side_P2": ["BINARY", 0, 1], - "stage": ["BOX", 1, 7], - "char_P1": ["DISCRETE", 0, 44], - "char_P2": ["DISCRETE", 0, 44], - "char_1_P1": ["DISCRETE", 0, 44], - "char_1_P2": ["DISCRETE", 0, 44], - "char_2_P1": ["DISCRETE", 0, 44], - "char_2_P2": ["DISCRETE", 0, 44], - "char_3_P1": ["DISCRETE", 0, 44], - "char_3_P2": ["DISCRETE", 0, 44], - "health_P1": ["BOX", -1, 119], - "health_P2": ["BOX", -1, 119], - "power_bar_P1": ["BOX", 0, 100], - "power_bar_P2": ["BOX", 0, 100], - "special_attacks_P1": ["BOX", 0, 5], - "special_attacks_P2": ["BOX", 0, 5], - "wins_P1": ["BOX", 0, 3], - "wins_P2": ["BOX", 0, 3], - "bar_type_P1": ["DISCRETE", 0, 7], - "bar_type_P2": ["DISCRETE", 0, 7], - "timer": ["BOX", 0, 60] + "common": { + "stage": ["BOX", 1, 7], + "timer": ["BOX", 0, 60] + }, + "Px": { + "side": ["BINARY", 0, 1], + "character": ["DISCRETE", 0, 44], + "character_1": ["DISCRETE", 0, 44], + "character_2": ["DISCRETE", 0, 44], + "character_3": ["DISCRETE", 0, 44], + "health": ["BOX", -1, 119], + "power_bar": ["BOX", 0, 100], + "special_attacks": ["BOX", 0, 5], + "wins": ["BOX", 0, 3], + "bar_type": ["DISCRETE", 0, 7] + } }, "cfg": {"WP": "But1", "WK": "But2", "SP": "But3", "SK": "But4"} } diff --git a/diambra/arena/wrappers/arena_wrappers.py b/diambra/arena/wrappers/arena_wrappers.py index a8e37e86..59fb8799 100644 --- a/diambra/arena/wrappers/arena_wrappers.py +++ b/diambra/arena/wrappers/arena_wrappers.py @@ -4,7 +4,7 @@ import logging from diambra.arena.env_settings import WrappersSettings from diambra.arena.wrappers.observation import WarpFrame, GrayscaleFrame, FrameStack, ActionsStack, \ - ScaledFloatObsNeg, ScaledFloatObs, FlattenFilterDictObs + ScaledFloatObs, FlattenFilterDictObs # Remove attack buttons combinations class NoAttackButtonsCombinations(gym.Wrapper): @@ -69,7 +69,7 @@ def __init__(self, env, sticky_actions): gym.Wrapper.__init__(self, env) self.sticky_actions = sticky_actions assert self.unwrapped.env_settings.step_ratio == 1, "sticky_actions can be activated only "\ - "when step_ratio is set equal to 1" + "when step_ratio is set equal to 1" def step(self, action): rew = 0.0 @@ -134,6 +134,14 @@ def env_wrapping(env, wrappers_settings: WrappersSettings): if wrappers_settings.sticky_actions > 1: env = StickyActionsEnv(env, sticky_actions=wrappers_settings.sticky_actions) + # Normalize rewards + if wrappers_settings.reward_normalization is True: + env = NormalizeRewardEnv(env, wrappers_settings.reward_normalization_factor) + + # Clip rewards using sign function + if wrappers_settings.clip_rewards is True: + env = ClipRewardEnv(env) + if wrappers_settings.frame_shape[2] == 1: if env.observation_space["frame"].shape[2] == 1: env.logger.warning("Warning: skipping grayscaling as the frame is already single channel.") @@ -156,14 +164,6 @@ def env_wrapping(env, wrappers_settings: WrappersSettings): env = WarpFrame(env, wrappers_settings.frame_shape[:2]) - # Normalize rewards - if wrappers_settings.reward_normalization is True: - env = NormalizeRewardEnv(env, wrappers_settings.reward_normalization_factor) - - # Clip rewards using sign function - if wrappers_settings.clip_rewards is True: - env = ClipRewardEnv(env) - # Stack #frameStack frames together if wrappers_settings.frame_stack > 1: env = FrameStack(env, wrappers_settings.frame_stack, wrappers_settings.dilation) @@ -172,17 +172,9 @@ def env_wrapping(env, wrappers_settings: WrappersSettings): if wrappers_settings.actions_stack > 1: env = ActionsStack(env, wrappers_settings.actions_stack) - # Scales observations normalizing them + # Scales observations normalizing them between 0.0 and 1.0 if wrappers_settings.scale is True: - if wrappers_settings.scale_mod == 0: - # Between 0.0 and 1.0 - env = ScaledFloatObs(env, wrappers_settings.exclude_image_scaling, wrappers_settings.process_discrete_binary) - elif wrappers_settings.scale_mod == -1: - # Between -1.0 and 1.0 - raise RuntimeError("Scaling between -1.0 and 1.0 currently not implemented") - env = ScaledFloatObsNeg(env) - else: - raise ValueError("Scale mod must be either 0 or -1") + env = ScaledFloatObs(env, wrappers_settings.exclude_image_scaling, wrappers_settings.process_discrete_binary) if wrappers_settings.flatten is True: env = FlattenFilterDictObs(env, wrappers_settings.filter_keys) diff --git a/diambra/arena/wrappers/observation.py b/diambra/arena/wrappers/observation.py index 97e726dd..86da9dc3 100644 --- a/diambra/arena/wrappers/observation.py +++ b/diambra/arena/wrappers/observation.py @@ -159,20 +159,6 @@ def step(self, action): return self._process_obs(obs), reward, terminated, truncated, info -class ScaledFloatObsNeg(gym.ObservationWrapper): - def __init__(self, env): - gym.ObservationWrapper.__init__(self, env) - self.observation_space.spaces["frame"] = gym.spaces.Box(low=-1.0, high=1.0, - shape=self.observation_space["frame"].shape, - dtype=np.float32) - - def observation(self, observation): - # careful! This undoes the memory optimization, use - # with smaller replay buffers only. - observation["frame"] = observation["frame"] / 127.5 - 1.0 - return observation - - class ScaledFloatObs(gym.ObservationWrapper): def __init__(self, env, exclude_image_scaling=False, process_discrete_binary=False): gym.ObservationWrapper.__init__(self, env) diff --git a/tests/env_exec_interface.py b/tests/env_exec_interface.py index 51ea7bfe..7c315841 100755 --- a/tests/env_exec_interface.py +++ b/tests/env_exec_interface.py @@ -43,7 +43,6 @@ def env_exec(settings, options_list, wrappers_settings, episode_recording_settin no_action = random.choices([True, False], [args["no_action_probability"], 1.0 - args["no_action_probability"]])[0] while curr_num_ep < max_num_ep: - actions = env.action_space.sample() if env.env_settings.n_players == 1: @@ -97,27 +96,16 @@ def env_exec(settings, options_list, wrappers_settings, episode_recording_settin cumulative_ep_rew_all.append(cumulative_ep_rew) cumulative_ep_rew = 0.0 - if np.any([info["round_done"], info["stage_done"], info["game_done"], info["episode_done"]]): - # Side check - if env.env_settings.n_players == 1: - ram_state_values = [observation["own_side"], observation["opp_side"]] - else: - if "agent_0_own_side" in observation.keys(): - ram_state_values = [observation["agent_0_own_side"], observation["agent_0_opp_side"]] - else: - ram_state_values = [observation["agent_0"]["own_side"], observation["agent_0"]["opp_side"]] - - if env.env_settings.pb_model.episode_settings.player_settings[0].role == "P2": - if (ram_state_values[0] != 1.0 or ram_state_values[1] != 0.0): - raise RuntimeError("Wrong starting sides:", ram_state_values[0], ram_state_values[1]) - else: - if (ram_state_values[0] != 0.0 or ram_state_values[1] != 1.0): - raise RuntimeError("Wrong starting sides:", ram_state_values[0], ram_state_values[1]) + if info["round_done"]: + # Side check when no wrappers active: + if len(wrappers_settings) == 0: + if (observation["P1"]["side"] != 0.0 or observation["P2"]["side"] != 1.0): + raise RuntimeError("Wrong starting sides:", observation["P1"]["side"], observation["P2"]["side"]) - frame = observation["frame"] + elif ("frame_shape" in wrappers_settings.keys() and wrappers_settings["frame_shape"][2] == 1): + # Frames equality check + frame = observation["frame"] - # Frames equality check - if ("frame_shape" in wrappers_settings.keys() and wrappers_settings["frame_shape"][2] == 1): for frame_idx in range(frame.shape[2] - 1): if np.any(frame[:, :, frame_idx] != frame[:, :, frame_idx + 1]): raise RuntimeError("Frames inside observation after round/stage/game/episode done are " @@ -142,7 +130,6 @@ def env_exec(settings, options_list, wrappers_settings, episode_recording_settin round_max_reward = env.max_delta_health / env.reward_normalization_value if (no_action is True and (np.mean(cumulative_ep_rew_all) > -(max_continue + 1) * round_max_reward * n_rounds + 0.001)): - message = "NoAction policy and average reward different than {} ({})".format( -(max_continue + 1) * round_max_reward * n_rounds, np.mean(cumulative_ep_rew_all)) warnings.warn(UserWarning(message)) diff --git a/tests/man_test_random.py b/tests/man_test_random.py index 0df603b7..ebe5a0fb 100644 --- a/tests/man_test_random.py +++ b/tests/man_test_random.py @@ -43,6 +43,7 @@ settings["difficulty"] = opt.difficulty if opt.difficulty != 0 else None settings["characters"] = ((opt.character0, opt.character0_2, opt.character0_3), (opt.character1, opt.character1_2, opt.character1_3)) + settings["characters"] = tuple([None if "Random" in settings["characters"][idx] else settings["characters"] for idx in range(2)]) settings["step_ratio"] = opt.stepRatio settings["continue_game"] = opt.continueGame settings["action_space"] = (diambra.arena.SpaceType.DISCRETE, diambra.arena.SpaceType.DISCRETE) if opt.actionSpace == "discrete" else \ diff --git a/tests/test_wrappers_settings.py b/tests/test_wrappers_settings.py index 0786cd2e..f76ff388 100644 --- a/tests/test_wrappers_settings.py +++ b/tests/test_wrappers_settings.py @@ -27,7 +27,7 @@ def func(settings, wrappers_settings, episode_recording_settings, mocker): wrappers_settings_var_order = ["no_attack_buttons_combinations", "no_op_max", "sticky_actions", "frame_shape", "reward_normalization", "reward_normalization_factor", "clip_rewards", "frame_stack", "dilation", - "actions_stack", "scale", "scale_mod", "flatten", "filter_keys", "wrappers"] + "actions_stack", "scale", "flatten", "filter_keys", "wrappers"] games_dict = available_games(False) @@ -43,7 +43,6 @@ def func(settings, wrappers_settings, episode_recording_settings, mocker): "dilation": [1, 3], "actions_stack": [1, 6], "scale": [True, False], - "scale_mod": [0], "flatten": [True, False], "filter_keys": [[], ["stage", "own_side"]], "wrappers": [[]], @@ -61,7 +60,6 @@ def func(settings, wrappers_settings, episode_recording_settings, mocker): "dilation": [0], "actions_stack": [-2], "scale": [10], - "scale_mod": [2], "flatten": [None], "filter_keys": [12], "wrappers": ["test"], @@ -78,7 +76,7 @@ def pytest_generate_tests(metafunc): @pytest.mark.parametrize("action_space", [diambra.arena.SpaceType.DISCRETE, diambra.arena.SpaceType.MULTI_DISCRETE]) def test_wrappers_settings(game_id, step_ratio, n_players, action_space, no_attack_buttons_combinations, no_op_max, sticky_actions, frame_shape, reward_normalization, reward_normalization_factor, - clip_rewards, frame_stack, dilation, actions_stack, scale, scale_mod, + clip_rewards, frame_stack, dilation, actions_stack, scale, flatten, filter_keys, wrappers, expected, mocker): # Env settings @@ -103,7 +101,6 @@ def test_wrappers_settings(game_id, step_ratio, n_players, action_space, no_atta wrappers_settings["dilation"] = dilation wrappers_settings["actions_stack"] = actions_stack wrappers_settings["scale"] = scale - wrappers_settings["scale_mod"] = scale_mod wrappers_settings["flatten"] = flatten wrappers_settings["filter_keys"] = filter_keys wrappers_settings["wrappers"] = wrappers