From d08d480d5f840149cbc5fcdfaf1f1164be415efb Mon Sep 17 00:00:00 2001
From: Alessandro Palmas <alessandropalmas.mail@gmail.com>
Date: Fri, 19 Apr 2024 21:56:27 -0400
Subject: [PATCH] Modifications completed, to be tested

---
 diambra/arena/arena_gym.py               |   3 +
 diambra/arena/env_settings.py            |  19 ++-
 diambra/arena/utils/gym_utils.py         |  12 --
 diambra/arena/utils/integratedGames.json | 111 ++++++++++++++++-
 diambra/arena/utils/policies.py          | 151 -----------------------
 5 files changed, 126 insertions(+), 170 deletions(-)
 delete mode 100644 diambra/arena/utils/policies.py

diff --git a/diambra/arena/arena_gym.py b/diambra/arena/arena_gym.py
index 58473e66..bcd68bcb 100644
--- a/diambra/arena/arena_gym.py
+++ b/diambra/arena/arena_gym.py
@@ -155,6 +155,9 @@ def show_obs(self, observation, wait_key=1, viz=True, string="observation", key=
                 if key.startswith("character"):
                     char_idx = observation if type(observation) == int else np.where(observation == 1)[0][0]
                     print(string + ": {} / {}".format(observation, self.env_info.characters_info.char_list[char_idx]))
+                elif key == "partner":
+                    partner_idx = observation if type(observation) == int else np.where(observation == 1)[0][0]
+                    print(string + ": {} / {}".format(observation, self.env_info.characters_info.partner_list[partner_idx]))
                 else:
                     print(string + ": {}".format(observation))
             else:
diff --git a/diambra/arena/env_settings.py b/diambra/arena/env_settings.py
index b824e48f..3b561b3d 100644
--- a/diambra/arena/env_settings.py
+++ b/diambra/arena/env_settings.py
@@ -71,7 +71,7 @@ class EnvironmentSettingsBase:
     pb_model: model = None
 
     episode_settings = ["seed", "difficulty", "continue_game", "show_final", "tower", "role",
-                        "characters", "outfits", "super_art", "fighting_style", "ultimate_style"]
+                        "characters", "outfits", "super_art", "fighting_style", "ultimate_style", "speed_mode"]
 
     # Transforming env settings dict to pb request
     def get_pb_request(self, init=False):
@@ -217,8 +217,9 @@ class EnvironmentSettings(EnvironmentSettingsBase):
     characters: Union[None, str, Tuple[str], Tuple[str, str], Tuple[str, str, str]] = None
     outfits: int = 1
     super_art: Union[None, int] = None  # SFIII Specific
-    fighting_style: Union[None, int] = None # KOF Specific
+    fighting_style: Union[None, int] = None # KOF, MVSC Specific
     ultimate_style: Union[None, Tuple[int, int, int]] = None # KOF Specific
+    speed_mode: Union[None, int] = None # MVSC, XMVSF Specific
 
     def _sanity_check(self):
         super()._sanity_check()
@@ -245,6 +246,7 @@ def _sanity_check(self):
         if self.ultimate_style is not None:
             for idx in range(3):
                 check_val_in_list("ultimate_style[{}]".format(idx), self.ultimate_style[idx], [1, 2])
+        check_val_in_list("speed_mode", self.speed_mode, [None, 1, 2])
 
     def _get_action_spaces(self):
         return [self.action_space]
@@ -269,6 +271,8 @@ def _process_random_values(self):
             self.fighting_style = random.choice(list(range(1, 4)))
         if self.ultimate_style is None:
             self.ultimate_style = tuple([random.choice(list(range(1, 3))) for _ in range(3)])
+        if self.speed_mode is None:
+            self.speed_mode = random.choice(list(range(1, 3)))
 
     def _get_player_specific_values(self):
         player_settings = model.EnvSettings.EpisodeSettings.PlayerSettings(
@@ -277,7 +281,8 @@ def _get_player_specific_values(self):
             outfits=self.outfits,
             super_art=self.super_art,
             fighting_style=self.fighting_style,
-            ultimate_style={"dash": self.ultimate_style[0], "evade": self.ultimate_style[1], "bar": self.ultimate_style[2]}
+            ultimate_style={"dash": self.ultimate_style[0], "evade": self.ultimate_style[1], "bar": self.ultimate_style[2]},
+            speed_mode=self.speed_mode,
         )
 
         return [player_settings]
@@ -296,8 +301,9 @@ class EnvironmentSettingsMultiAgent(EnvironmentSettingsBase):
                       Tuple[Tuple[str, str, str], Tuple[str, str, str]]] = (None, None)
     outfits: Tuple[int, int] = (1, 1)
     super_art: Union[Tuple[None, None], Tuple[int, int]] = (None, None)  # SFIII Specific
-    fighting_style: Union[Tuple[None, None], Tuple[int, int]] = (None, None)  # KOF Specific
+    fighting_style: Union[Tuple[None, None], Tuple[int, int]] = (None, None)  # KOF, MVSC Specific
     ultimate_style: Union[Tuple[None, None], Tuple[Tuple[int, int, int], Tuple[int, int, int]]] = (None, None)  # KOF Specific
+    speed_mode: Union[Tuple[None, None], Tuple[int, int]] = (None, None)  # MVSC, XMVSF Specific
 
     def _sanity_check(self):
         super()._sanity_check()
@@ -329,6 +335,7 @@ def _sanity_check(self):
             if self.ultimate_style[idx] is not None:
                 for jdx in range(3):
                     check_val_in_list("ultimate_style[{}][{}]".format(idx, jdx), self.ultimate_style[idx][jdx], [1, 2])
+            check_val_in_list("speed_mode[{}]".format(idx), self.speed_mode[idx], [None, 1, 2])
 
     def _process_random_values(self):
         super()._process_random_values()
@@ -358,6 +365,7 @@ def _process_random_values(self):
         self.super_art = tuple([random.choice(list(range(1, 4))) if self.super_art[idx] is None else self.super_art[idx] for idx in range(2)])
         self.fighting_style = tuple([random.choice(list(range(1, 4))) if self.fighting_style[idx] is None else self.fighting_style[idx] for idx in range(2)])
         self.ultimate_style = tuple([[random.choice(list(range(1, 3))) for _ in range(3)] if self.ultimate_style[idx] is None else self.ultimate_style[idx] for idx in range(2)])
+        self.speed_mode = tuple([random.choice(list(range(1, 3))) if self.speed_mode[idx] is None else self.speed_mode[idx] for idx in range(2)])
 
     def _get_action_spaces(self):
         return [action_space for action_space in self.action_space]
@@ -372,7 +380,8 @@ def _get_player_specific_values(self):
                 outfits=self.outfits[idx],
                 super_art=self.super_art[idx],
                 fighting_style=self.fighting_style[idx],
-                ultimate_style={"dash": self.ultimate_style[idx][0], "evade": self.ultimate_style[idx][1], "bar": self.ultimate_style[idx][2]}
+                ultimate_style={"dash": self.ultimate_style[idx][0], "evade": self.ultimate_style[idx][1], "bar": self.ultimate_style[idx][2]},
+                speed_mode=self.speed_mode[idx],
             )
 
             players_env_settings.append(player_settings)
diff --git a/diambra/arena/utils/gym_utils.py b/diambra/arena/utils/gym_utils.py
index 2a5b2954..6f9a69da 100644
--- a/diambra/arena/utils/gym_utils.py
+++ b/diambra/arena/utils/gym_utils.py
@@ -97,18 +97,6 @@ def gym_obs_dict_space_to_standard_dict(observation_space_dict):
 
     return standard_dict
 
-# Utility to create a Gym compliant Dict Space from the InternalObsDict
-def standard_dict_to_gym_obs_dict(obsstandard_dict):
-
-    for k, v in obsstandard_dict.items():
-        if isinstance(v, dict):
-            obsstandard_dict[k] = standard_dict_to_gym_obs_dict(v)
-        else:
-            obsstandard_dict[k] = v
-
-    return spaces.Dict(obsstandard_dict)
-
-
 # Discrete to multidiscrete action conversion
 def discrete_to_multi_discrete_action(action, n_move_actions):
 
diff --git a/diambra/arena/utils/integratedGames.json b/diambra/arena/utils/integratedGames.json
index cf31502c..98168ccb 100644
--- a/diambra/arena/utils/integratedGames.json
+++ b/diambra/arena/utils/integratedGames.json
@@ -357,7 +357,7 @@
             "name": "Marvel VS Capcom",
             "id": "mvsc",
             "original_rom_name": "mvsc.zip",
-            "search_keywords": ["marvel vs capcom clash of super heroes", "marvel-vs.-capcom-clash-of-super-heroes", "5511", "wowroms"],
+            "search_keywords": ["marvel vs capcom clash of super heroes", "marvel-vs.-capcom-clash-of-super-heroes-euro-980123", "5511", "wowroms"],
             "notes": "Requires the QSound_HLE sound driver to be placed in the roms folder. Google `qsound_hle.zip` or `dl-1425.bin`",
             "nvram_save": "",
             "sha256": "6f63627cc37c554f74e8bf07b21730fa7f85511c7d5d07449850be98dde91da8",
@@ -420,5 +420,112 @@
             },
             "cfg": {"WP": "But1", "MP": "But2", "SP": "But3", "WK": "But4", "MK": "But5", "SK": "But6"}
         },
-
+    "xmvsf":
+        {
+            "name": "X-Men VS Street Fighter",
+            "id": "xmvsf",
+            "original_rom_name": "xmvsf.zip",
+            "search_keywords": ["x-men vs street fighter", "x-men-vs.-street-fighter-usa-961004", "8769", "wowroms"],
+            "notes": "Requires the QSound_HLE sound driver to be placed in the roms folder. Google `qsound_hle.zip` or `dl-1425.bin`",
+            "nvram_save": "",
+            "sha256": "833aa46af63a3ad87f69ce2bacd85a4445f35a50e3aff4f793f069b205b51c60",
+            "char_list": ["Akuma", "Magneto", "Juggernaut", "Dhalsim", "Mr. Bison",
+                          "Sabretooth", "Storm", "Chun-Li", "Zangief", "Gambit",
+                          "Rogue", "Cammy", "Charlie", "Wolverine", "Cyclops", "Ryu",
+                          "Ken", "Apocalypse", "Alpha Chun-Li"],
+            "char_forbidden_list": ["Apocalypse", "Alpha Chun-Li"],
+            "char_homonymy_map": {},
+            "difficulty": [1, 8, 2],
+            "difficulty_to_cluster_map": {
+                "1": "Easy",
+                "2": "Easy",
+                "3": "Easy",
+                "4": "Easy",
+                "5": "Medium",
+                "6": "Medium",
+                "7": "Hard",
+                "8": "Hard"
+            },
+            "cluster_to_difficulty_map": {
+                "Easy": 4,
+                "Medium": 6,
+                "Hard": 8
+            },
+            "rounds_per_stage": 1,
+            "stages_per_game": 8,
+            "number_of_chars_per_round": 2,
+            "number_of_chars_to_select": 2,
+            "n_actions": [9, 10, 7],
+            "health": [0, 144],
+            "frame_shape": [224, 384, 3],
+            "ram_states": {
+                "common": {
+                    "stage": ["BOX", 1, 8],
+                    "timer": ["BOX", 0, 99]
+                },
+                "Px": {
+                    "side": ["BINARY", 0, 1],
+                    "wins": ["BOX", 0, 1],
+                    "character": ["DISCRETE", 0, 19],
+                    "character_1": ["DISCRETE", 0, 19],
+                    "character_2": ["DISCRETE", 0, 19],
+                    "health_1": ["BOX", 0, 144],
+                    "health_2": ["BOX", 0, 144],
+                    "active_character": ["BINARY", 0, 1],
+                    "super_bar": ["BOX", 0, 144],
+                    "super_count": ["BOX", 0, 3]
+                }
+            },
+            "cfg": {"WP": "But1", "MP": "But2", "SP": "But3", "WK": "But4", "MK": "But5", "SK": "But6"}
+        },
+    "soulclbr":
+        {
+            "name": "Soul Calibur",
+            "id": "soulclbr",
+            "original_rom_name": "soulclbr.zip",
+            "search_keywords": ["soul calibur", "soul-calibur", "106959", "wowroms"],
+            "notes": "",
+            "nvram_save": "at28c16",
+            "sha256": "a07a1a19995d582b56f2865783c5d7adb7acb9a6ad995a26fc7c4cfecd821817",
+            "char_list": ["Xianghua", "Yoshimitsu", "Lizard Man", "Siegfried",
+                          "Rock", "Seung Mina", "Edge Master", "Voldo",
+                          "Ivy", "Sophitia", "Arthur", "Kilik", "Hwang",
+                          "Maxi", "Nightmare", "Taki", "Astaroth", "Inferno"],
+            "char_forbidden_list": ["Inferno"],
+            "char_homonymy_map": {},
+            "outfits": [1, 2],
+            "difficulty": [1, 5, 3],
+            "difficulty_to_cluster_map": {
+                "1": "Easy",
+                "2": "Easy",
+                "3": "Easy",
+                "4": "Medium",
+                "5": "Hard"
+            },
+            "cluster_to_difficulty_map": {
+                "Easy": 3,
+                "Medium": 4,
+                "Hard": 5
+            },
+            "rounds_per_stage": 2,
+            "stages_per_game": 8,
+            "number_of_chars_per_round": 1,
+            "number_of_chars_to_select": 1,
+            "n_actions": [9, 10, 5],
+            "health": [0, 240],
+            "frame_shape": [240, 512, 3],
+            "ram_states": {
+                "common": {
+                    "stage": ["BOX", 1, 8],
+                    "timer": ["BOX", 0, 40]
+                },
+                "Px": {
+                    "side": ["BINARY", 0, 1],
+                    "wins": ["BOX", 0, 2],
+                    "character": ["DISCRETE", 0, 18],
+                    "health": ["BOX", 0, 240]
+                }
+            },
+            "cfg": {"HA": "But1", "VA": "But2", "K": "But3", "G": "But4"}
+        }
 }
diff --git a/diambra/arena/utils/policies.py b/diambra/arena/utils/policies.py
deleted file mode 100644
index add90aef..00000000
--- a/diambra/arena/utils/policies.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Collection of policies to be applied on the environment
-import numpy as np
-import random
-import logging
-
-# No action policy
-
-
-class NoActionPolicy(object):
-
-    def __init__(self, name="No Action", action_space="multiDiscrete"):
-        self.id = "noAction"
-        self.name = name
-        self.action_space = action_space
-
-    def initialize(self):
-        pass
-
-    def reset(self, observation):
-        pass
-
-    def act(self, observation, info=None):
-
-        if self.action_space == "multiDiscrete":
-            prob = [1.0, 1.0]
-            action = [0, 0]
-        else:
-            prob = [1.0, 1.0]
-            action = 0
-
-        return action, prob
-
-# Random policy, sampling from the action space
-
-
-class RandomPolicy(object):
-
-    def __init__(self, n_actions, name="Random", action_space="multiDiscrete"):
-        self.n_actions = np.array(n_actions)
-        self.id = "random"
-        self.name = name
-        self.action_space = action_space
-
-    def initialize(self):
-        pass
-
-    def reset(self, observation):
-        pass
-
-    def act(self, observation, info=None):
-
-        prob = 1.0/self.n_actions
-
-        if self.action_space == "discrete":
-            action = random.randrange(self.n_actions[0]+self.n_actions[1]-1)
-        else:
-            action = []
-            for idx in range(self.n_actions.shape[0]):
-                action.append(random.randrange(self.n_actions[idx]))
-
-        return action, prob
-
-# RL Policy, which uses a model to select the action
-
-
-class RLPolicy(object):
-
-    def __init__(self, model, deterministic_flag,
-                 n_actions, name="Generic RL", action_space="multiDiscrete"):
-
-        self.n_actions = n_actions
-        self.deterministic_flag = deterministic_flag
-        self.model = model
-        self.id = "rl"
-        self.name = name
-        self.action_space = action_space
-        self.logger = logging.getLogger(__name__)
-
-    def initialize(self):
-        pass
-
-    def reset(self, observation):
-        pass
-
-    def update_weights(self, weights_path):
-        self.logger.debug("Loading new weights: {}".format(weights_path))
-        self.model.load_parameters(weights_path)
-
-    def act(self, observation, info=None):
-        action_prob = self.model.action_probability(observation)
-
-        # if self.deterministic_flag:
-        #   action = np.argmax(action_prob)
-        # else:
-        #   if self.action_space == "discrete":
-        #       action = np.random.choice([x for x in range(len(action_prob))],
-        #                                 p=action_prob)
-        #   else:
-        #       action = self.model.predict(observation,
-        #                                   deterministic=self.deterministic_flag)
-        action, _ = self.model.predict(
-            observation, deterministic=self.deterministic_flag)
-        action = action.tolist()
-
-        prob = action_prob
-        if self.action_space == "discrete":
-
-            if action >= self.n_actions[0]:
-                prob = [0.0, action_prob[action]]
-            else:
-                prob = [action_prob[action], 0.0]
-        else:
-
-            prob = action_prob
-            # self.logger.error("Warning!! Probabilities for
-            #        MultiDiscrete are not correct!")
-            # Can be ok doing nothing, but better to check
-            # raise Exception("To be checked")
-
-        return action, prob
-
-# Human policy, retrieved via GamePad
-
-
-class GamepadPolicy(object):
-
-    def __init__(self, gamepad_class, name="Human"):
-        self.gamepad_class = gamepad_class
-        self.id = "gamepad"
-        self.initialized = False
-        self.name = name
-
-    def initialize(self, action_list, gamepad_num=0):
-        if not self.initialized:
-            self.gamepad = self.gamepad_class(action_list=action_list,
-                                              gamepad_num=gamepad_num)
-            self.gamepad.start()
-            self.initialized = True
-
-    def reset(self, observation):
-        pass
-
-    def get_actions(self):
-        return self.gamepad.get_actions()
-
-    def act(self, observation, info=None):
-
-        prob = [1.0, 1.0]
-        action = self.gamepad.get_actions()
-
-        return action, prob