From d08d480d5f840149cbc5fcdfaf1f1164be415efb Mon Sep 17 00:00:00 2001 From: Alessandro Palmas Date: Fri, 19 Apr 2024 21:56:27 -0400 Subject: [PATCH] Modifications completed, to be tested --- diambra/arena/arena_gym.py | 3 + diambra/arena/env_settings.py | 19 ++- diambra/arena/utils/gym_utils.py | 12 -- diambra/arena/utils/integratedGames.json | 111 ++++++++++++++++- diambra/arena/utils/policies.py | 151 ----------------------- 5 files changed, 126 insertions(+), 170 deletions(-) delete mode 100644 diambra/arena/utils/policies.py diff --git a/diambra/arena/arena_gym.py b/diambra/arena/arena_gym.py index 58473e66..bcd68bcb 100644 --- a/diambra/arena/arena_gym.py +++ b/diambra/arena/arena_gym.py @@ -155,6 +155,9 @@ def show_obs(self, observation, wait_key=1, viz=True, string="observation", key= if key.startswith("character"): char_idx = observation if type(observation) == int else np.where(observation == 1)[0][0] print(string + ": {} / {}".format(observation, self.env_info.characters_info.char_list[char_idx])) + elif key == "partner": + partner_idx = observation if type(observation) == int else np.where(observation == 1)[0][0] + print(string + ": {} / {}".format(observation, self.env_info.characters_info.partner_list[partner_idx])) else: print(string + ": {}".format(observation)) else: diff --git a/diambra/arena/env_settings.py b/diambra/arena/env_settings.py index b824e48f..3b561b3d 100644 --- a/diambra/arena/env_settings.py +++ b/diambra/arena/env_settings.py @@ -71,7 +71,7 @@ class EnvironmentSettingsBase: pb_model: model = None episode_settings = ["seed", "difficulty", "continue_game", "show_final", "tower", "role", - "characters", "outfits", "super_art", "fighting_style", "ultimate_style"] + "characters", "outfits", "super_art", "fighting_style", "ultimate_style", "speed_mode"] # Transforming env settings dict to pb request def get_pb_request(self, init=False): @@ -217,8 +217,9 @@ class EnvironmentSettings(EnvironmentSettingsBase): characters: Union[None, str, Tuple[str], Tuple[str, str], Tuple[str, str, str]] = None outfits: int = 1 super_art: Union[None, int] = None # SFIII Specific - fighting_style: Union[None, int] = None # KOF Specific + fighting_style: Union[None, int] = None # KOF, MVSC Specific ultimate_style: Union[None, Tuple[int, int, int]] = None # KOF Specific + speed_mode: Union[None, int] = None # MVSC, XMVSF Specific def _sanity_check(self): super()._sanity_check() @@ -245,6 +246,7 @@ def _sanity_check(self): if self.ultimate_style is not None: for idx in range(3): check_val_in_list("ultimate_style[{}]".format(idx), self.ultimate_style[idx], [1, 2]) + check_val_in_list("speed_mode", self.speed_mode, [None, 1, 2]) def _get_action_spaces(self): return [self.action_space] @@ -269,6 +271,8 @@ def _process_random_values(self): self.fighting_style = random.choice(list(range(1, 4))) if self.ultimate_style is None: self.ultimate_style = tuple([random.choice(list(range(1, 3))) for _ in range(3)]) + if self.speed_mode is None: + self.speed_mode = random.choice(list(range(1, 3))) def _get_player_specific_values(self): player_settings = model.EnvSettings.EpisodeSettings.PlayerSettings( @@ -277,7 +281,8 @@ def _get_player_specific_values(self): outfits=self.outfits, super_art=self.super_art, fighting_style=self.fighting_style, - ultimate_style={"dash": self.ultimate_style[0], "evade": self.ultimate_style[1], "bar": self.ultimate_style[2]} + ultimate_style={"dash": self.ultimate_style[0], "evade": self.ultimate_style[1], "bar": self.ultimate_style[2]}, + speed_mode=self.speed_mode, ) return [player_settings] @@ -296,8 +301,9 @@ class EnvironmentSettingsMultiAgent(EnvironmentSettingsBase): Tuple[Tuple[str, str, str], Tuple[str, str, str]]] = (None, None) outfits: Tuple[int, int] = (1, 1) super_art: Union[Tuple[None, None], Tuple[int, int]] = (None, None) # SFIII Specific - fighting_style: Union[Tuple[None, None], Tuple[int, int]] = (None, None) # KOF Specific + fighting_style: Union[Tuple[None, None], Tuple[int, int]] = (None, None) # KOF, MVSC Specific ultimate_style: Union[Tuple[None, None], Tuple[Tuple[int, int, int], Tuple[int, int, int]]] = (None, None) # KOF Specific + speed_mode: Union[Tuple[None, None], Tuple[int, int]] = (None, None) # MVSC, XMVSF Specific def _sanity_check(self): super()._sanity_check() @@ -329,6 +335,7 @@ def _sanity_check(self): if self.ultimate_style[idx] is not None: for jdx in range(3): check_val_in_list("ultimate_style[{}][{}]".format(idx, jdx), self.ultimate_style[idx][jdx], [1, 2]) + check_val_in_list("speed_mode[{}]".format(idx), self.speed_mode[idx], [None, 1, 2]) def _process_random_values(self): super()._process_random_values() @@ -358,6 +365,7 @@ def _process_random_values(self): self.super_art = tuple([random.choice(list(range(1, 4))) if self.super_art[idx] is None else self.super_art[idx] for idx in range(2)]) self.fighting_style = tuple([random.choice(list(range(1, 4))) if self.fighting_style[idx] is None else self.fighting_style[idx] for idx in range(2)]) self.ultimate_style = tuple([[random.choice(list(range(1, 3))) for _ in range(3)] if self.ultimate_style[idx] is None else self.ultimate_style[idx] for idx in range(2)]) + self.speed_mode = tuple([random.choice(list(range(1, 3))) if self.speed_mode[idx] is None else self.speed_mode[idx] for idx in range(2)]) def _get_action_spaces(self): return [action_space for action_space in self.action_space] @@ -372,7 +380,8 @@ def _get_player_specific_values(self): outfits=self.outfits[idx], super_art=self.super_art[idx], fighting_style=self.fighting_style[idx], - ultimate_style={"dash": self.ultimate_style[idx][0], "evade": self.ultimate_style[idx][1], "bar": self.ultimate_style[idx][2]} + ultimate_style={"dash": self.ultimate_style[idx][0], "evade": self.ultimate_style[idx][1], "bar": self.ultimate_style[idx][2]}, + speed_mode=self.speed_mode[idx], ) players_env_settings.append(player_settings) diff --git a/diambra/arena/utils/gym_utils.py b/diambra/arena/utils/gym_utils.py index 2a5b2954..6f9a69da 100644 --- a/diambra/arena/utils/gym_utils.py +++ b/diambra/arena/utils/gym_utils.py @@ -97,18 +97,6 @@ def gym_obs_dict_space_to_standard_dict(observation_space_dict): return standard_dict -# Utility to create a Gym compliant Dict Space from the InternalObsDict -def standard_dict_to_gym_obs_dict(obsstandard_dict): - - for k, v in obsstandard_dict.items(): - if isinstance(v, dict): - obsstandard_dict[k] = standard_dict_to_gym_obs_dict(v) - else: - obsstandard_dict[k] = v - - return spaces.Dict(obsstandard_dict) - - # Discrete to multidiscrete action conversion def discrete_to_multi_discrete_action(action, n_move_actions): diff --git a/diambra/arena/utils/integratedGames.json b/diambra/arena/utils/integratedGames.json index cf31502c..98168ccb 100644 --- a/diambra/arena/utils/integratedGames.json +++ b/diambra/arena/utils/integratedGames.json @@ -357,7 +357,7 @@ "name": "Marvel VS Capcom", "id": "mvsc", "original_rom_name": "mvsc.zip", - "search_keywords": ["marvel vs capcom clash of super heroes", "marvel-vs.-capcom-clash-of-super-heroes", "5511", "wowroms"], + "search_keywords": ["marvel vs capcom clash of super heroes", "marvel-vs.-capcom-clash-of-super-heroes-euro-980123", "5511", "wowroms"], "notes": "Requires the QSound_HLE sound driver to be placed in the roms folder. Google `qsound_hle.zip` or `dl-1425.bin`", "nvram_save": "", "sha256": "6f63627cc37c554f74e8bf07b21730fa7f85511c7d5d07449850be98dde91da8", @@ -420,5 +420,112 @@ }, "cfg": {"WP": "But1", "MP": "But2", "SP": "But3", "WK": "But4", "MK": "But5", "SK": "But6"} }, - + "xmvsf": + { + "name": "X-Men VS Street Fighter", + "id": "xmvsf", + "original_rom_name": "xmvsf.zip", + "search_keywords": ["x-men vs street fighter", "x-men-vs.-street-fighter-usa-961004", "8769", "wowroms"], + "notes": "Requires the QSound_HLE sound driver to be placed in the roms folder. Google `qsound_hle.zip` or `dl-1425.bin`", + "nvram_save": "", + "sha256": "833aa46af63a3ad87f69ce2bacd85a4445f35a50e3aff4f793f069b205b51c60", + "char_list": ["Akuma", "Magneto", "Juggernaut", "Dhalsim", "Mr. Bison", + "Sabretooth", "Storm", "Chun-Li", "Zangief", "Gambit", + "Rogue", "Cammy", "Charlie", "Wolverine", "Cyclops", "Ryu", + "Ken", "Apocalypse", "Alpha Chun-Li"], + "char_forbidden_list": ["Apocalypse", "Alpha Chun-Li"], + "char_homonymy_map": {}, + "difficulty": [1, 8, 2], + "difficulty_to_cluster_map": { + "1": "Easy", + "2": "Easy", + "3": "Easy", + "4": "Easy", + "5": "Medium", + "6": "Medium", + "7": "Hard", + "8": "Hard" + }, + "cluster_to_difficulty_map": { + "Easy": 4, + "Medium": 6, + "Hard": 8 + }, + "rounds_per_stage": 1, + "stages_per_game": 8, + "number_of_chars_per_round": 2, + "number_of_chars_to_select": 2, + "n_actions": [9, 10, 7], + "health": [0, 144], + "frame_shape": [224, 384, 3], + "ram_states": { + "common": { + "stage": ["BOX", 1, 8], + "timer": ["BOX", 0, 99] + }, + "Px": { + "side": ["BINARY", 0, 1], + "wins": ["BOX", 0, 1], + "character": ["DISCRETE", 0, 19], + "character_1": ["DISCRETE", 0, 19], + "character_2": ["DISCRETE", 0, 19], + "health_1": ["BOX", 0, 144], + "health_2": ["BOX", 0, 144], + "active_character": ["BINARY", 0, 1], + "super_bar": ["BOX", 0, 144], + "super_count": ["BOX", 0, 3] + } + }, + "cfg": {"WP": "But1", "MP": "But2", "SP": "But3", "WK": "But4", "MK": "But5", "SK": "But6"} + }, + "soulclbr": + { + "name": "Soul Calibur", + "id": "soulclbr", + "original_rom_name": "soulclbr.zip", + "search_keywords": ["soul calibur", "soul-calibur", "106959", "wowroms"], + "notes": "", + "nvram_save": "at28c16", + "sha256": "a07a1a19995d582b56f2865783c5d7adb7acb9a6ad995a26fc7c4cfecd821817", + "char_list": ["Xianghua", "Yoshimitsu", "Lizard Man", "Siegfried", + "Rock", "Seung Mina", "Edge Master", "Voldo", + "Ivy", "Sophitia", "Arthur", "Kilik", "Hwang", + "Maxi", "Nightmare", "Taki", "Astaroth", "Inferno"], + "char_forbidden_list": ["Inferno"], + "char_homonymy_map": {}, + "outfits": [1, 2], + "difficulty": [1, 5, 3], + "difficulty_to_cluster_map": { + "1": "Easy", + "2": "Easy", + "3": "Easy", + "4": "Medium", + "5": "Hard" + }, + "cluster_to_difficulty_map": { + "Easy": 3, + "Medium": 4, + "Hard": 5 + }, + "rounds_per_stage": 2, + "stages_per_game": 8, + "number_of_chars_per_round": 1, + "number_of_chars_to_select": 1, + "n_actions": [9, 10, 5], + "health": [0, 240], + "frame_shape": [240, 512, 3], + "ram_states": { + "common": { + "stage": ["BOX", 1, 8], + "timer": ["BOX", 0, 40] + }, + "Px": { + "side": ["BINARY", 0, 1], + "wins": ["BOX", 0, 2], + "character": ["DISCRETE", 0, 18], + "health": ["BOX", 0, 240] + } + }, + "cfg": {"HA": "But1", "VA": "But2", "K": "But3", "G": "But4"} + } } diff --git a/diambra/arena/utils/policies.py b/diambra/arena/utils/policies.py deleted file mode 100644 index add90aef..00000000 --- a/diambra/arena/utils/policies.py +++ /dev/null @@ -1,151 +0,0 @@ -# Collection of policies to be applied on the environment -import numpy as np -import random -import logging - -# No action policy - - -class NoActionPolicy(object): - - def __init__(self, name="No Action", action_space="multiDiscrete"): - self.id = "noAction" - self.name = name - self.action_space = action_space - - def initialize(self): - pass - - def reset(self, observation): - pass - - def act(self, observation, info=None): - - if self.action_space == "multiDiscrete": - prob = [1.0, 1.0] - action = [0, 0] - else: - prob = [1.0, 1.0] - action = 0 - - return action, prob - -# Random policy, sampling from the action space - - -class RandomPolicy(object): - - def __init__(self, n_actions, name="Random", action_space="multiDiscrete"): - self.n_actions = np.array(n_actions) - self.id = "random" - self.name = name - self.action_space = action_space - - def initialize(self): - pass - - def reset(self, observation): - pass - - def act(self, observation, info=None): - - prob = 1.0/self.n_actions - - if self.action_space == "discrete": - action = random.randrange(self.n_actions[0]+self.n_actions[1]-1) - else: - action = [] - for idx in range(self.n_actions.shape[0]): - action.append(random.randrange(self.n_actions[idx])) - - return action, prob - -# RL Policy, which uses a model to select the action - - -class RLPolicy(object): - - def __init__(self, model, deterministic_flag, - n_actions, name="Generic RL", action_space="multiDiscrete"): - - self.n_actions = n_actions - self.deterministic_flag = deterministic_flag - self.model = model - self.id = "rl" - self.name = name - self.action_space = action_space - self.logger = logging.getLogger(__name__) - - def initialize(self): - pass - - def reset(self, observation): - pass - - def update_weights(self, weights_path): - self.logger.debug("Loading new weights: {}".format(weights_path)) - self.model.load_parameters(weights_path) - - def act(self, observation, info=None): - action_prob = self.model.action_probability(observation) - - # if self.deterministic_flag: - # action = np.argmax(action_prob) - # else: - # if self.action_space == "discrete": - # action = np.random.choice([x for x in range(len(action_prob))], - # p=action_prob) - # else: - # action = self.model.predict(observation, - # deterministic=self.deterministic_flag) - action, _ = self.model.predict( - observation, deterministic=self.deterministic_flag) - action = action.tolist() - - prob = action_prob - if self.action_space == "discrete": - - if action >= self.n_actions[0]: - prob = [0.0, action_prob[action]] - else: - prob = [action_prob[action], 0.0] - else: - - prob = action_prob - # self.logger.error("Warning!! Probabilities for - # MultiDiscrete are not correct!") - # Can be ok doing nothing, but better to check - # raise Exception("To be checked") - - return action, prob - -# Human policy, retrieved via GamePad - - -class GamepadPolicy(object): - - def __init__(self, gamepad_class, name="Human"): - self.gamepad_class = gamepad_class - self.id = "gamepad" - self.initialized = False - self.name = name - - def initialize(self, action_list, gamepad_num=0): - if not self.initialized: - self.gamepad = self.gamepad_class(action_list=action_list, - gamepad_num=gamepad_num) - self.gamepad.start() - self.initialized = True - - def reset(self, observation): - pass - - def get_actions(self): - return self.gamepad.get_actions() - - def act(self, observation, info=None): - - prob = [1.0, 1.0] - action = self.gamepad.get_actions() - - return action, prob