diff --git a/examples/benchmarking/README.md b/examples/benchmarking/README.md index 9781448b5..72832d960 100644 --- a/examples/benchmarking/README.md +++ b/examples/benchmarking/README.md @@ -8,16 +8,17 @@ Code here is used to benchmark the performance of various simulators/benchmarks To benchmark ManiSkill + SAPIEN, after following the setup instructions on this repository's README.md, run ``` -python benchmark_gpu_sim.py -e "PickCube-v1" --num-envs=1024 --obs-mode=state # test just state simulation -python benchmark_gpu_sim.py -e "PickCube-v1" --num-envs=128 --obs-mode=rgbd # test state sim + parallel rendering one 128x128 RGBD cameras per environment -python benchmark_gpu_sim.py -e "PickCube-v1" --num-envs=128 --save-video # save a video showing all 128 visual observations +python benchmark_maniskill.py -e "PickCube-v1" --num-envs=1024 --obs-mode=state # test just state simulation +python benchmark_maniskill.py -e "PickCube-v1" --num-envs=128 --obs-mode=rgbd # test state sim + parallel rendering one 128x128 RGBD cameras per environment +python benchmark_maniskill.py -e "PickCube-v1" --num-envs=128 --save-video # save a video showing all 128 visual observations ``` -To get the best reported results, we run two commands on a machine with a RTX 4090: +To get the reported results, we run two commands on a machine with a RTX 4090: ``` -python benchmark_gpu_sim.py -e "PickCube-v1" --num-envs=4096 --obs-mode=state -python benchmark_gpu_sim.py -e "PickCube-v1" --num-envs=1536 --obs-mode=rgbd +python benchmark_maniskill.py -e "PickCube-v1" --num-envs=4096 --obs-mode=state --control-freq=50 +python benchmark_maniskill.py -e "PickCube-v1" --num-envs=1536 --obs-mode=rgbd --control-freq=50 +# note we use --control-freq=50 as this is the control frequency isaac sim based repos tend to use ``` These are the expected state-based only results: diff --git a/examples/benchmarking/benchmark_cpu_sim.py b/examples/benchmarking/benchmark_cpu_sim.py deleted file mode 100644 index b87b0f05f..000000000 --- a/examples/benchmarking/benchmark_cpu_sim.py +++ /dev/null @@ -1,44 +0,0 @@ -import time - -import gymnasium as gym -import numpy as np -import sapien -import sapien.physx -import sapien.render -import tqdm - -import mani_skill2.envs - -if __name__ == "__main__": - num_envs = 12 - env_id = "PickCube-v0" - env = gym.make_vec( - env_id, - num_envs, - vectorization_mode="async", - vector_kwargs=dict(context="forkserver"), - ) - np.random.seed(2022) - env.reset(seed=2022) - print("GPU Simulation Enabled:", sapien.physx.is_gpu_enabled()) - N = 1000 - stime = time.time() - for i in tqdm.tqdm(range(N)): - env.step(env.action_space.sample()) - dtime = time.time() - stime - FPS = num_envs * N / dtime - print(f"{FPS=:0.3f}. {N=} frames in {dtime:0.3f}s with {num_envs} parallel envs") - N = 1000 - stime = time.time() - for i in tqdm.tqdm(range(N)): - actions = env.action_space.sample() - obs, rew, terminated, truncated, info = env.step(actions) - if i % 200 == 0 and i != 0: - env.reset() - print("RESET") - dtime = time.time() - stime - FPS = num_envs * N / dtime - print( - f"{FPS=:0.3f}. {N=} frames in {dtime:0.3f}s with {num_envs} parallel envs with step+reset" - ) - env.close() diff --git a/examples/benchmarking/benchmark_gpu_sim.py b/examples/benchmarking/benchmark_maniskill.py similarity index 74% rename from examples/benchmarking/benchmark_gpu_sim.py rename to examples/benchmarking/benchmark_maniskill.py index 7a379c9d7..6a1e3060a 100644 --- a/examples/benchmarking/benchmark_gpu_sim.py +++ b/examples/benchmarking/benchmark_maniskill.py @@ -12,6 +12,10 @@ import tqdm import mani_skill2.envs +from mani_skill2.envs.scenes.tasks.planner.planner import PickSubtask +from mani_skill2.envs.scenes.tasks.sequential_task import SequentialTaskEnv +from mani_skill2.utils.scene_builder.ai2thor.variants import ArchitecTHORSceneBuilder +from mani_skill2.utils.scene_builder.replicacad.scene_builder import ReplicaCADSceneBuilder from mani_skill2.vector.wrappers.gymnasium import ManiSkillVectorEnv from profiling import Profiler from mani_skill2.utils.visualization.misc import images_to_video, tile_images @@ -21,20 +25,30 @@ def main(args): profiler = Profiler(output_format=args.format) num_envs = args.num_envs - env = gym.make( - args.env_id, - num_envs=num_envs, - obs_mode=args.obs_mode, - # enable_shadow=True, - render_mode=args.render_mode, - control_mode=args.control_mode, - sim_cfg=dict(control_freq=50) - ) - if isinstance(env.action_space, gym.spaces.Dict): - env = FlattenActionSpaceWrapper(env) - env = ManiSkillVectorEnv(env) + sim_cfg = dict() + if args.control_freq: + sim_cfg["control_freq"] = args.control_freq + if args.sim_freq: + sim_cfg["sim_freq"] = args.sim_freq + if not args.cpu_sim: + env = gym.make( + args.env_id, + num_envs=num_envs, + obs_mode=args.obs_mode, + # enable_shadow=True, + render_mode=args.render_mode, + control_mode=args.control_mode, + sim_cfg=sim_cfg + ) + if isinstance(env.action_space, gym.spaces.Dict): + env = FlattenActionSpaceWrapper(env) + env = ManiSkillVectorEnv(env) + base_env = env.base_env + else: + env = gym.make_vec(args.env_id, num_envs=args.num_envs, vectorization_mode="async", vector_kwargs=dict(context="spawn"), obs_mode=args.obs_mode,) + base_env = gym.make(args.env_id, obs_mode=args.obs_mode).unwrapped sensor_settings_str = [] - for uid, cam in env.base_env._sensors.items(): + for uid, cam in base_env._sensors.items(): cfg = cam.cfg sensor_settings_str.append(f"{cfg.width}x{cfg.height}") sensor_settings_str = "_".join(sensor_settings_str) @@ -51,10 +65,10 @@ def main(args): f"render_mode={args.render_mode}, sensor_details={sensor_settings_str}, save_video={args.save_video}" ) print( - f"sim_freq={env.base_env.sim_freq}, control_freq={env.base_env.control_freq}" + f"sim_freq={base_env.sim_freq}, control_freq={base_env.control_freq}" ) print(f"observation space: {env.observation_space}") - print(f"action space: {env.base_env.single_action_space}") + print(f"action space: {base_env.single_action_space}") print( "# -------------------------------------------------------------------------- #" ) @@ -70,7 +84,7 @@ def main(args): with profiler.profile("env.step", total_steps=N, num_envs=num_envs): for i in range(N): actions = ( - 2 * torch.rand(env.action_space.shape, device=env.base_env.device) + 2 * torch.rand(env.action_space.shape, device=base_env.device) - 1 ) obs, rew, terminated, truncated, info = env.step(actions) @@ -126,6 +140,9 @@ def parse_args(): parser.add_argument("-o", "--obs-mode", type=str, default="state") parser.add_argument("-c", "--control-mode", type=str, default="pd_joint_delta_pos") parser.add_argument("-n", "--num-envs", type=int, default=1024) + parser.add_argument("--cpu-sim", action="store_true", help="Whether to use the CPU or GPU simulation") + parser.add_argument("--control-freq", type=int, default=None, help="The control frequency to use") + parser.add_argument("--sim-freq", type=int, default=None, help="The simulation frequency to use") parser.add_argument( "--render-mode", type=str, diff --git a/mani_skill2/__init__.py b/mani_skill2/__init__.py index da500af2e..49b8a1fe6 100644 --- a/mani_skill2/__init__.py +++ b/mani_skill2/__init__.py @@ -9,7 +9,12 @@ PACKAGE_DIR = Path(__file__).parent.resolve() PACKAGE_ASSET_DIR = PACKAGE_DIR / "assets" # Non-package data -ASSET_DIR = Path(os.getenv("MS2_ASSET_DIR", "data")) +ASSET_DIR = Path( + os.getenv("MS_ASSET_DIR", os.path.join(os.path.expanduser("~"), ".maniskill/data")) +) +DEMO_DIR = Path( + os.getenv("MS_ASSET_DIR", os.path.join(os.path.expanduser("~"), ".maniskill/demos")) +) def format_path(p: str): diff --git a/mani_skill2/agents/robots/fetch/fetch.py b/mani_skill2/agents/robots/fetch/fetch.py index c95753e74..d2d6dcdf8 100644 --- a/mani_skill2/agents/robots/fetch/fetch.py +++ b/mani_skill2/agents/robots/fetch/fetch.py @@ -51,13 +51,25 @@ class Fetch(BaseAgent): q=[1, 0, 0, 0], width=128, height=128, - fov=1.57, + fov=2, near=0.01, far=10, entity_uid="head_camera_link", + ), + CameraConfig( + uid="fetch_hand", + p=[-0.1, 0, 0.1], + q=[1, 0, 0, 0], + width=128, + height=128, + fov=2, + near=0.01, + far=10, + entity_uid="gripper_link", ) ] REACHABLE_DIST = 1.5 + RESTING_QPOS = np.array([0, 0, 0, 0.386, 0, -0.370, 0.562, -1.032, 0.695, 0.955, -0.1, 2.077, 0, 0.015, 0.015]) def __init__(self, *args, **kwargs): self.arm_joint_names = [ @@ -204,14 +216,14 @@ def controller_configs(self): # -------------------------------------------------------------------------- # # Body # -------------------------------------------------------------------------- # - body_pd_joint_pos = PDJointPosControllerConfig( + body_pd_joint_delta_pos = PDJointPosControllerConfig( self.body_joint_names, - [-1.57, -1.6056, 0], - [1.57, 1.6056, 0.38615], + -0.1, + 0.1, self.body_stiffness, self.body_damping, self.body_force_limit, - normalize_action=True, + use_delta=True, ) # -------------------------------------------------------------------------- # @@ -229,69 +241,69 @@ def controller_configs(self): pd_joint_delta_pos=dict( arm=arm_pd_joint_delta_pos, gripper=gripper_pd_joint_pos, - body=body_pd_joint_pos, + body=body_pd_joint_delta_pos, base=base_pd_joint_vel, ), pd_joint_pos=dict( arm=arm_pd_joint_pos, gripper=gripper_pd_joint_pos, - body=body_pd_joint_pos, + body=body_pd_joint_delta_pos, base=base_pd_joint_vel, ), pd_ee_delta_pos=dict( arm=arm_pd_ee_delta_pos, gripper=gripper_pd_joint_pos, - body=body_pd_joint_pos, + body=body_pd_joint_delta_pos, base=base_pd_joint_vel, ), pd_ee_delta_pose=dict( arm=arm_pd_ee_delta_pose, gripper=gripper_pd_joint_pos, - body=body_pd_joint_pos, + body=body_pd_joint_delta_pos, base=base_pd_joint_vel, ), pd_ee_delta_pose_align=dict( arm=arm_pd_ee_delta_pose_align, gripper=gripper_pd_joint_pos, - body=body_pd_joint_pos, + body=body_pd_joint_delta_pos, base=base_pd_joint_vel, ), # TODO(jigu): how to add boundaries for the following controllers pd_joint_target_delta_pos=dict( arm=arm_pd_joint_target_delta_pos, gripper=gripper_pd_joint_pos, - body=body_pd_joint_pos, + body=body_pd_joint_delta_pos, base=base_pd_joint_vel, ), pd_ee_target_delta_pos=dict( arm=arm_pd_ee_target_delta_pos, gripper=gripper_pd_joint_pos, - body=body_pd_joint_pos, + body=body_pd_joint_delta_pos, base=base_pd_joint_vel, ), pd_ee_target_delta_pose=dict( arm=arm_pd_ee_target_delta_pose, gripper=gripper_pd_joint_pos, - body=body_pd_joint_pos, + body=body_pd_joint_delta_pos, base=base_pd_joint_vel, ), # Caution to use the following controllers pd_joint_vel=dict( arm=arm_pd_joint_vel, gripper=gripper_pd_joint_pos, - body=body_pd_joint_pos, + body=body_pd_joint_delta_pos, base=base_pd_joint_vel, ), pd_joint_pos_vel=dict( arm=arm_pd_joint_pos_vel, gripper=gripper_pd_joint_pos, - body=body_pd_joint_pos, + body=body_pd_joint_delta_pos, base=base_pd_joint_vel, ), pd_joint_delta_pos_vel=dict( arm=arm_pd_joint_delta_pos_vel, gripper=gripper_pd_joint_pos, - body=body_pd_joint_pos, + body=body_pd_joint_delta_pos, base=base_pd_joint_vel, ), ) @@ -315,7 +327,7 @@ def _after_init(self): self.r_wheel_link: Link = get_obj_by_name( self.robot.get_links(), "r_wheel_link" ) - for link in [self.base_link, self.l_wheel_link, self.r_wheel_link]: + for link in [self.l_wheel_link, self.r_wheel_link]: for body in link._bodies: cs = body.get_collision_shapes()[0] cg = cs.get_collision_groups() @@ -326,8 +338,8 @@ def _after_init(self): self.robot.get_links(), "torso_lift_link" ) - self.torso_lift_link: Link = get_obj_by_name( - self.robot.get_links(), "torso_lift_link" + self.head_camera_link: Link = get_obj_by_name( + self.robot.get_links(), "head_camera_link" ) self.queries: Dict[ @@ -419,7 +431,7 @@ def is_grasping(self, object: Actor = None, min_impulse=1e-6, max_angle=85): and np.rad2deg(rangle) <= max_angle ) - return all([lflag, rflag]) + return torch.tensor([all([lflag, rflag])], dtype=bool) def is_static(self, threshold: float = 0.2): qvel = self.robot.get_qvel()[..., :-2] diff --git a/mani_skill2/assets/robots/fetch/fetch.urdf b/mani_skill2/assets/robots/fetch/fetch.urdf index a629eb952..dddf70f78 100644 --- a/mani_skill2/assets/robots/fetch/fetch.urdf +++ b/mani_skill2/assets/robots/fetch/fetch.urdf @@ -22,14 +22,14 @@ - + - + diff --git a/mani_skill2/envs/__init__.py b/mani_skill2/envs/__init__.py index 6e73415d0..e9c81a4a5 100644 --- a/mani_skill2/envs/__init__.py +++ b/mani_skill2/envs/__init__.py @@ -1,4 +1,4 @@ # from .ms1 import * from .ms2 import * -from .scenes.pick_object import * +from .scenes import * from .tasks import * diff --git a/mani_skill2/envs/sapien_env.py b/mani_skill2/envs/sapien_env.py index 04895748c..f1912793d 100644 --- a/mani_skill2/envs/sapien_env.py +++ b/mani_skill2/envs/sapien_env.py @@ -965,6 +965,10 @@ def render_human(self): if self._viewer is None: self._viewer = Viewer() self._setup_viewer() + if "render_camera" in self._human_render_cameras: + self._viewer.set_camera_pose( + self._human_render_cameras["render_camera"].camera.global_pose + ) for obj in self._hidden_objects: obj.show_visual() diff --git a/mani_skill2/envs/scenes/__init__.py b/mani_skill2/envs/scenes/__init__.py new file mode 100644 index 000000000..1c69ac41f --- /dev/null +++ b/mani_skill2/envs/scenes/__init__.py @@ -0,0 +1,28 @@ +from mani_skill2.utils.registration import register_env +from mani_skill2.utils.scene_builder.ai2thor.variants import ( + ArchitecTHORSceneBuilder, + ProcTHORSceneBuilder, + RoboTHORSceneBuilder, + iTHORSceneBuilder, +) +from mani_skill2.utils.scene_builder.replicacad.scene_builder import ( + ReplicaCADSceneBuilder, +) + +from .base_env import SceneManipulationEnv + +scene_builders = { + "ReplicaCAD": ReplicaCADSceneBuilder, + "ArchitecTHOR": ArchitecTHORSceneBuilder, + "ProcTHOR": ProcTHORSceneBuilder, + "RoboTHOR": RoboTHORSceneBuilder, + "iTHOR": iTHORSceneBuilder, +} + +# Register environments just for benchmarking/exploration and to be creatable by just ID, these don't have any specific tasks designed in them. +for k, scene_builder in scene_builders.items(): + register_env( + f"{k}_SceneManipulation-v1", + max_episode_steps=None, + scene_builder_cls=scene_builder, + )(SceneManipulationEnv) diff --git a/mani_skill2/envs/scenes/base_env.py b/mani_skill2/envs/scenes/base_env.py index 96311f264..aeea02c40 100644 --- a/mani_skill2/envs/scenes/base_env.py +++ b/mani_skill2/envs/scenes/base_env.py @@ -1,25 +1,29 @@ +from typing import Any, Dict, Union + import numpy as np import sapien as sapien import sapien.physx as physx +import torch from sapien import Pose -from mani_skill2.agents.robots import Panda +from mani_skill2.agents.robots import Fetch, Panda from mani_skill2.envs.sapien_env import BaseEnv from mani_skill2.sensors.camera import CameraConfig +from mani_skill2.utils.registration import register_env from mani_skill2.utils.sapien_utils import look_at from mani_skill2.utils.scene_builder import SceneBuilder -from mani_skill2.utils.scene_builder.ai2thor import ( - ArchitecTHORSceneBuilder, - ProcTHORSceneBuilder, - RoboTHORSceneBuilder, - iTHORSceneBuilder, +from mani_skill2.utils.scene_builder.replicacad.scene_builder import ( + ReplicaCADSceneBuilder, ) -from mani_skill2.utils.structs.pose import vectorize_pose +from mani_skill2.utils.structs.types import GPUMemoryConfig, SimConfig +@register_env("SceneManipulation-v1", max_episode_steps=200) class SceneManipulationEnv(BaseEnv): - agent: Panda """ + A base environment for simulating manipulation tasks in more complex scenes. Creating this environment is only useful for explorations/visualization, there are no success/failure + metrics or rewards + Args: robot_uids: Which robot to place into the scene. Default is "panda" @@ -32,85 +36,121 @@ class SceneManipulationEnv(BaseEnv): convex_decomposition: Choice of convex decomposition algorithm to generate collision meshes for objects. Default is `coacd` which uses https://github.com/SarahWeiii/CoACD """ + SUPPORTED_ROBOTS = ["panda", "fetch"] + sim_cfg = SimConfig( + spacing=50, + gpu_memory_cfg=GPUMemoryConfig( + found_lost_pairs_capacity=2**25, + max_rigid_patch_count=2**19, + max_rigid_contact_count=2**21, + ), + ) + agent: Union[Panda, Fetch] + def __init__( self, *args, - robot_uids="panda", + robot_uids="fetch", robot_init_qpos_noise=0.02, fixed_scene=True, - scene_builder_cls: SceneBuilder = ArchitecTHORSceneBuilder, + scene_builder_cls: SceneBuilder = ReplicaCADSceneBuilder, convex_decomposition="coacd", + scene_idxs=None, **kwargs ): self.robot_init_qpos_noise = robot_init_qpos_noise self.fixed_scene = fixed_scene self.sampled_scene_idx: int = None - self.scene_builder = scene_builder_cls() - self.scene_ids = np.arange(0, len(self.scene_builder.scene_configs)) + self.scene_builder: SceneBuilder = scene_builder_cls( + self, robot_init_qpos_noise=robot_init_qpos_noise + ) + if isinstance(scene_idxs, int): + self.scene_idxs = [scene_idxs] + elif isinstance(scene_idxs, list): + self.scene_idxs = scene_idxs + else: + self.scene_idxs = np.arange(0, len(self.scene_builder.scene_configs)) self.convex_decomposition = convex_decomposition super().__init__(*args, robot_uids=robot_uids, **kwargs) def reset(self, seed=None, options=None): self._set_episode_rng(seed) if options is None: - options = dict(reconfigure=True) + options = dict(reconfigure=False) if not self.fixed_scene: options["reconfigure"] = True - if options["reconfigure"]: - self.sampled_scene_idx = self._episode_rng.randint(0, len(self.scene_ids)) + if "reconfigure" in options and options["reconfigure"]: + self.sampled_scene_idx = self.scene_idxs[ + self._episode_rng.randint(0, len(self.scene_idxs)) + ] + self.sampled_scene_idx = int(self.sampled_scene_idx) return super().reset(seed, options) + def _setup_lighting(self): + if self.scene_builder.builds_lighting: + return + return super()._setup_lighting() + def _load_actors(self): self.scene_builder.build( self._scene, - scene_id=self.sampled_scene_idx, + scene_idx=self.sampled_scene_idx, convex_decomposition=self.convex_decomposition, ) - def _initialize_agent(self): - if self.robot_uids == "panda": - # fmt: off - # EE at [0.615, 0, 0.17] - qpos = np.array( - [0.0, np.pi / 8, 0, -np.pi * 5 / 8, 0, np.pi * 3 / 4, np.pi / 4, 0.04, 0.04] - ) - # fmt: on - qpos[:-2] += self._episode_rng.normal( - 0, self.robot_init_qpos_noise, len(qpos) - 2 - ) - self.agent.reset(qpos) - self.agent.robot.set_pose(Pose([-0.615, 0, 0])) - elif self.robot_uids == "xmate3_robotiq": - qpos = np.array( - [0, np.pi / 6, 0, np.pi / 3, 0, np.pi / 2, -np.pi / 2, 0, 0] - ) - qpos[:-2] += self._episode_rng.normal( - 0, self.robot_init_qpos_noise, len(qpos) - 2 - ) - self.agent.reset(qpos) - self.agent.robot.set_pose(Pose([-0.562, 0, 0])) - else: - raise NotImplementedError(self.robot_uids) + def _initialize_actors(self, env_idx: torch.Tensor): + with torch.device(self.device): + self.scene_builder.initialize(env_idx) + + def evaluate(self) -> dict: + return dict() + + def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): + return 0 + + def compute_normalized_dense_reward( + self, obs: Any, action: torch.Tensor, info: Dict + ): + return self.compute_dense_reward(obs=obs, action=action, info=info) / 1 def _register_sensors(self): + if self.robot_uids == "fetch": + return () + pose = look_at([0.3, 0, 0.6], [-0.1, 0, 0.1]) return CameraConfig( "base_camera", pose.p, pose.q, 128, 128, np.pi / 2, 0.01, 10 ) def _register_human_render_cameras(self): + if self.robot_uids == "fetch": + room_camera_pose = look_at([2.5, -2.5, 3], [0.0, 0.0, 0]) + room_camera_config = CameraConfig( + "render_camera", + room_camera_pose.p, + room_camera_pose.q, + 512, + 512, + 1, + 0.01, + 10, + ) + robot_camera_pose = look_at([2, 0, 1], [0, 0, -1]) + robot_camera_config = CameraConfig( + "robot_render_camera", + robot_camera_pose.p, + robot_camera_pose.q, + 512, + 512, + 1.5, + 0.01, + 10, + link=self.agent.torso_lift_link, + ) + return [room_camera_config, robot_camera_config] + if self.robot_uids == "panda": pose = look_at([0.4, 0.4, 0.8], [0.0, 0.0, 0.4]) else: - pose = look_at([0.5, 0.5, 1.0], [0.0, 0.0, 0.5]) + pose = look_at([0, 10, -3], [0, 0, 0]) return CameraConfig("render_camera", pose.p, pose.q, 512, 512, 1, 0.01, 10) - - def _setup_viewer(self): - super()._setup_viewer() - self._viewer.set_camera_xyz(0.8, 0, 1.0) - self._viewer.set_camera_rpy(0, -0.5, 3.14) - - def _get_obs_agent(self): - obs = self.agent.get_proprioception() - obs["base_pose"] = vectorize_pose(self.agent.robot.pose) - return obs diff --git a/mani_skill2/envs/scenes/pick_object.py b/mani_skill2/envs/scenes/pick_object.py deleted file mode 100644 index 9b2f0c4ea..000000000 --- a/mani_skill2/envs/scenes/pick_object.py +++ /dev/null @@ -1,59 +0,0 @@ -from collections import OrderedDict - -import numpy as np -import sapien -import sapien.physx as physx - -from mani_skill2.utils.registration import register_env -from mani_skill2.utils.structs.pose import vectorize_pose - -from .base_env import SceneManipulationEnv - - -@register_env("PickObjectScene-v0", max_episode_steps=200) -class PickObjectSceneEnv(SceneManipulationEnv): - """ - Args: - - """ - - def __init__(self, *args, **kwargs): - self.box_length = 0.1 - super().__init__(*args, **kwargs) - - def _load_actors(self): - super()._load_actors() - - def reconfigure(self): - super().reconfigure() - self.init_state = self.get_state() - - def _get_obs_extra(self) -> OrderedDict: - obs = OrderedDict( - tcp_pose=vectorize_pose(self.agent.tcp.pose), - obj_pose=vectorize_pose(self.goal_obj.pose), - ) - return obs - - def evaluate(self, **kwargs): - return dict(success=self.agent.is_grasping(self.goal_obj)) - - def compute_dense_reward(self, info, **kwargs): - return 1 - np.tanh(np.linalg.norm(self.goal_obj.pose.p - self.agent.tcp.pose.p)) - - def compute_normalized_dense_reward(self, **kwargs): - return self.compute_dense_reward(**kwargs) / 1 - - def _initialize_actors(self): - self.set_state(self.init_state) - - def _initialize_task(self): - # pick a random goal object to pick up - self.goal_obj: sapien.Entity = self._episode_rng.choice( - self.scene_builder.movable_objects - ) - print(f"Target Object: {self.goal_obj.name}") - - def reset(self, seed=None, options=None): - # sample a scene - return super().reset(seed, options) diff --git a/mani_skill2/examples/demo_random_action.py b/mani_skill2/examples/demo_random_action.py index 02750dd83..177f2d4e2 100644 --- a/mani_skill2/examples/demo_random_action.py +++ b/mani_skill2/examples/demo_random_action.py @@ -78,8 +78,9 @@ def main(args): if args.render_mode is not None: env.render() - if terminated or truncated: - break + if args.render_mode is None or args.render_mode != "human": + if terminated or truncated: + break env.close() diff --git a/mani_skill2/utils/building/actors.py b/mani_skill2/utils/building/actors.py index 04bb8337d..528401d1f 100644 --- a/mani_skill2/utils/building/actors.py +++ b/mani_skill2/utils/building/actors.py @@ -262,7 +262,7 @@ def build_actor_ycb( model_id: str, scene: ManiSkillScene, name: str, - root_dir=ASSET_DIR / "mani_skill2_ycb", + root_dir=ASSET_DIR / "assets/mani_skill2_ycb", body_type: str = "dynamic", add_collision: bool = True, return_builder: bool = False, @@ -299,7 +299,7 @@ def build_actor_ycb( def _load_ycb_dataset(): # load YCB if used MODEL_DBS["YCB"] = { - "model_data": load_json(ASSET_DIR / "mani_skill2_ycb/info_pick_v0.json"), + "model_data": load_json(ASSET_DIR / "assets/mani_skill2_ycb/info_pick_v0.json"), "builder": build_actor_ycb, } diff --git a/mani_skill2/utils/building/articulation_builder.py b/mani_skill2/utils/building/articulation_builder.py index ed717cb3f..55c7987b8 100644 --- a/mani_skill2/utils/building/articulation_builder.py +++ b/mani_skill2/utils/building/articulation_builder.py @@ -49,7 +49,6 @@ def create_link_builder(self, parent: LinkBuilder = None): builder = LinkBuilder(len(self.link_builders), parent) self.link_builders.append(builder) if self.disable_self_collisions: - # NOTE (stao): Currently in SAPIEN you can't set collision groups after building the links due to some bug when doing GPU sim. builder.collision_groups[2] |= 1 << 29 return builder diff --git a/mani_skill2/utils/building/urdf_loader.py b/mani_skill2/utils/building/urdf_loader.py index cc1112888..d8732ebf2 100644 --- a/mani_skill2/utils/building/urdf_loader.py +++ b/mani_skill2/utils/building/urdf_loader.py @@ -16,7 +16,7 @@ class URDFLoader(SapienURDFLoader): scene: ManiSkillScene - name: str + name: str = None disable_self_collisions: bool = False def parse( diff --git a/mani_skill2/utils/download_asset.py b/mani_skill2/utils/download_asset.py index b7b5300fc..215c3b460 100644 --- a/mani_skill2/utils/download_asset.py +++ b/mani_skill2/utils/download_asset.py @@ -11,57 +11,86 @@ import shutil import urllib.request import zipfile +from dataclasses import dataclass from pathlib import Path +from typing import Dict from urllib.error import URLError +from huggingface_hub import login, snapshot_download from tqdm.auto import tqdm from mani_skill2 import ASSET_DIR, PACKAGE_ASSET_DIR from mani_skill2.utils.io_utils import load_json -DATA_SOURCES = {} + +@dataclass +class DataSource: + url: str = None + hf_repo_id: str = None + target_path: str = None + checksum: str = None + output_dir: str = ASSET_DIR + + +DATA_SOURCES: Dict[str, DataSource] = {} DATA_GROUPS = {} def initialize_sources(): - DATA_SOURCES["ycb"] = dict( + """ + Initialize the metadata for assets + + Note that the current organization works as follows + + - assets/* contain files for individual objects (.obj, .glb etc.) and articulations (.urdf etc.) that are generally reused. + E.g. Partnet Mobility and the YCB dataset. + - tasks/* contain files that are otherwise too big to upload to GitHub and are relevant for just that one task and will generally not be reused + - scene_datasets/* is a bit messier but contains a self-contained folder for each scene dataset each of which is likely organized differently. + These datasets often put their unique object and articulation files together with scene configuration data. In the future we will re-organize these + datasets so that all objects are put into assets and leave scene_datasets for scene configuration information. + - robots/* contains files for additional robots that are not included by default. + + """ + + # TODO add google scanned objects + DATA_SOURCES["ycb"] = DataSource( url="https://huggingface.co/datasets/haosulab/ManiSkill2/resolve/main/data/mani_skill2_ycb.zip", - target_path="mani_skill2_ycb", + target_path="assets/mani_skill2_ycb", checksum="174001ba1003cc0c5adda6453f4433f55ec7e804f0f0da22d015d525d02262fb", ) DATA_GROUPS["PickSingleYCB-v0"] = ["ycb"] - DATA_SOURCES["pick_clutter_ycb"] = dict( + DATA_SOURCES["pick_clutter_ycb"] = DataSource( url="https://storage1.ucsd.edu/datasets/ManiSkill2022-assets/pick_clutter/ycb_train_5k.json.gz", - output_dir=ASSET_DIR / "pick_clutter", + target_path="tasks/pick_clutter", checksum="70ec176c7036f326ea7813b77f8c03bea9db5960198498957a49b2895a9ec338", ) DATA_GROUPS["PickClutterYCB-v0"] = ["ycb", "pick_clutter_ycb"] - DATA_SOURCES["assembling_kits"] = dict( + DATA_SOURCES["assembling_kits"] = DataSource( url="https://storage1.ucsd.edu/datasets/ManiSkill2022-assets/assembling_kits_v1.zip", - target_path="assembling_kits", + target_path="tasks/assembling_kits", checksum="e3371f17a07a012edaa3a0b3604fb1577f3fb921876c3d5ed59733dd75a6b4a0", ) DATA_GROUPS["AssemblingKits-v0"] = ["assembling_kits"] - DATA_SOURCES["panda_avoid_obstacles"] = dict( + DATA_SOURCES["panda_avoid_obstacles"] = DataSource( url="https://storage1.ucsd.edu/datasets/ManiSkill2022-assets/avoid_obstacles/panda_train_2k.json.gz", - output_dir=ASSET_DIR / "avoid_obstacles", + target_path="tasks/avoid_obstacles", checksum="44dae9a0804172515c290c1f49a1e7e72d76e40201a2c5c7d4a3ccd43b4d5be4", ) DATA_GROUPS["PandaAvoidObstacles-v0"] = ["panda_avoid_obstacles"] - DATA_SOURCES["pinch"] = dict( + DATA_SOURCES["pinch"] = DataSource( url="https://storage1.ucsd.edu/datasets/ManiSkill2022-assets/pinch.zip", - target_path="pinch", + target_path="tasks/pinch", checksum="3281d2d777fad42e6d37371b2d3ee16fb1c39984907176718ca2e4f447326fe7", ) DATA_GROUPS["Pinch-v0"] = ["pinch"] - DATA_SOURCES["write"] = dict( + DATA_SOURCES["write"] = DataSource( url="https://storage1.ucsd.edu/datasets/ManiSkill2022-assets/write.zip", - target_path="write", + target_path="tasks/write", checksum="c5b49e581bfed9cfb2107a607faf52795f840e93f5a7ad389290314513b4b634", ) DATA_GROUPS["Write-v0"] = ["write"] @@ -78,7 +107,7 @@ def initialize_sources(): category_uids[category] = [] for model_id in model_ids: uid = f"partnet_mobility/{model_id}" - DATA_SOURCES[uid] = dict( + DATA_SOURCES[uid] = DataSource( url=f"https://storage1.ucsd.edu/datasets/ManiSkill2022-assets/partnet_mobility/dataset/{model_id}.zip", output_dir=ASSET_DIR / "partnet_mobility" / "dataset", ) @@ -97,9 +126,17 @@ def initialize_sources(): DATA_GROUPS["MoveBucket-v1"] = category_uids["bucket"] DATA_GROUPS["TurnFaucet-v0"] = category_uids["faucet"] + # ---------------------------------------------------------------------------- # + # Interactable Scene Datasets + # ---------------------------------------------------------------------------- # + DATA_SOURCES["ReplicaCAD"] = DataSource( + hf_repo_id="haosulab/ReplicaCAD", + target_path="scene_datasets/replica_cad_dataset", + ) + def initialize_extra_sources(): - DATA_SOURCES["xmate3_robotiq"] = dict( + DATA_SOURCES["xmate3_robotiq"] = DataSource( url="https://storage1.ucsd.edu/datasets/ManiSkill2022-assets/xmate3_robotiq.zip", target_path="robots/xmate3_robotiq", checksum="ddda102a20eb41e28a0a501702e240e5d7f4084221a44f580e729f08b7c12d1a", @@ -108,17 +145,12 @@ def initialize_extra_sources(): # ---------------------------------------------------------------------------- # # Visual backgrounds # ---------------------------------------------------------------------------- # - DATA_SOURCES["minimalistic_modern_bedroom"] = dict( - url="https://storage1.ucsd.edu/datasets/ManiSkill-background/minimalistic_modern_bedroom.glb", - output_dir=ASSET_DIR / "background", - checksum="9d9ea14c8cdfab1ebafe3e8ff5a071b77b361c1abd24e9c59b0f08e1d1a3421a", - ) - # Alias - DATA_GROUPS["minimal_bedroom"] = ["minimalistic_modern_bedroom"] # All backgrounds DATA_GROUPS["backgrounds"] = ["minimalistic_modern_bedroom"] + # TODO add Replica, MatterPort 3D? + def prompt_yes_no(message): r"""Prints a message and prompts the user for "y" or "n" returning True or False.""" @@ -148,15 +180,26 @@ def sha256sum(filename, chunk_size=4096): return sha256_hash.hexdigest() +def download_from_hf_datasets( + data_source: DataSource, +): + output_dir = Path(data_source.output_dir) + output_path = output_dir / data_source.target_path + snapshot_download( + repo_id=data_source.hf_repo_id, + repo_type="dataset", + local_dir=output_path, + local_dir_use_symlinks=False, + resume_download=True, + ) + + def download( - url, - output_dir=ASSET_DIR, - target_path: str = None, - checksum=None, + data_source: DataSource, verbose=True, non_interactive=True, ): - output_dir = Path(output_dir) + output_dir = Path(data_source.output_dir) # Create output directory if not output_dir.exists(): @@ -164,9 +207,9 @@ def download( output_dir.mkdir(parents=True) else: return - + target_path = data_source.target_path if target_path is None: - target_path = url.split("/")[-1] + target_path = data_source.url.split("/")[-1] output_path = output_dir / target_path output_dir = osp.dirname(output_path) @@ -184,10 +227,14 @@ def download( print(f"Skip existing: {output_path}") return output_path + if data_source.hf_repo_id is not None: + download_from_hf_datasets(data_source) + return + # Download files to temporary location try: if verbose: - print(f"Downloading {url}") + print(f"Downloading {data_source.url}") pbar = tqdm(unit="iB", unit_scale=True, unit_divisor=1024) def show_progress(blocknum, bs, size): @@ -196,19 +243,22 @@ def show_progress(blocknum, bs, size): pbar.total = size pbar.update(bs) - filename, _ = urllib.request.urlretrieve(url, reporthook=show_progress) + filename, _ = urllib.request.urlretrieve( + data_source.url, reporthook=show_progress + ) if verbose: pbar.close() except URLError as err: - print(f"Failed to download {url}") + print(f"Failed to download {data_source.url}") raise err # Verify checksum - if checksum is not None and checksum != sha256sum(filename): - raise IOError(f"Downloaded file's SHA-256 hash does not match record: {url}") - + if data_source.checksum is not None and data_source.checksum != sha256sum(filename): + raise IOError( + f"Downloaded file's SHA-256 hash does not match record: {data_source.url}" + ) # Extract or move to output path - if url.endswith(".zip"): + if data_source.url.endswith(".zip"): with zipfile.ZipFile(filename, "r") as zip_ref: if verbose: for file in tqdm(zip_ref.infolist()): @@ -271,12 +321,12 @@ def main(args): if show_progress and verbose: print("Downloading assets for {}: {}/{}".format(args.uid, i + 1, len(uids))) - kwargs = DATA_SOURCES[uid].copy() + kwargs = dict() kwargs["verbose"] = verbose kwargs["non_interactive"] = args.non_interactive if args.output_dir is not None: kwargs["output_dir"] = args.output_dir - output_path = download(**kwargs) + output_path = download(DATA_SOURCES[uid], **kwargs) if output_path is not None and verbose: print("=" * 80) diff --git a/mani_skill2/utils/download_demo.py b/mani_skill2/utils/download_demo.py index 99a821c24..2ef801321 100644 --- a/mani_skill2/utils/download_demo.py +++ b/mani_skill2/utils/download_demo.py @@ -6,6 +6,8 @@ from tqdm import tqdm +from mani_skill2 import DEMO_DIR + DATASET_SOURCES = {} # Rigid body envs @@ -208,8 +210,7 @@ def parse_args(args=None): "-o", "--output_dir", type=str, - default="demos", - help="The directory to save demonstrations to. The files will then be saved to //", + help="The directory to save demonstrations to. The files will then be saved to //. By default it is saved to ~/.maniskill/demos or what MS_ASSET_DIR is set to.", ) return parser.parse_args(args) @@ -244,23 +245,23 @@ def main(args): raise ValueError( f"Version v{download_version} of demonstrations for {uid} do not exist. Latest version is v{meta['latest_version']}. If you think this is a bug please raise an issue on GitHub: https://github.com/haosulab/ManiSkill2/issues" ) + + object_paths = meta["object_paths"] + output_dir = str(DEMO_DIR) + if args.output_dir: + output_dir = args.output_dir + final_path = osp.join(output_dir, f"v{download_version}", meta["env_type"]) if verbose: print( - f"Downloading v{download_version} demonstrations: {i+1}/{len(uids)}, {uid}" + f"Downloading v{download_version} demonstrations to {final_path} - {i+1}/{len(uids)}, {uid}" ) - - object_paths = meta["object_paths"] for object_path in object_paths: local_path = download_file( - osp.join(args.output_dir), object_path, version=download_version + output_dir, object_path, version=download_version ) if osp.splitext(local_path)[1] == ".zip": with zipfile.ZipFile(local_path, "r") as zip_ref: - zip_ref.extractall( - osp.join( - args.output_dir, f"v{download_version}", meta["env_type"] - ) - ) + zip_ref.extractall(final_path) os.remove(local_path) diff --git a/mani_skill2/utils/scene_builder/ai2thor/constants.py b/mani_skill2/utils/scene_builder/ai2thor/constants.py index 89b1a632a..6e0fde2e5 100644 --- a/mani_skill2/utils/scene_builder/ai2thor/constants.py +++ b/mani_skill2/utils/scene_builder/ai2thor/constants.py @@ -11,6 +11,7 @@ class SceneConfig: config_file: str source: str + spawn_pos_file: str = None @dataclass diff --git a/mani_skill2/utils/scene_builder/ai2thor/scene_builder.py b/mani_skill2/utils/scene_builder/ai2thor/scene_builder.py index ce7989e15..5bc028203 100644 --- a/mani_skill2/utils/scene_builder/ai2thor/scene_builder.py +++ b/mani_skill2/utils/scene_builder/ai2thor/scene_builder.py @@ -7,7 +7,7 @@ import json import os.path as osp from pathlib import Path -from typing import List +from typing import Dict, List, Tuple import numpy as np import sapien @@ -17,7 +17,10 @@ from tqdm import tqdm from mani_skill2 import ASSET_DIR +from mani_skill2.agents.robots.fetch import FETCH_UNIQUE_COLLISION_BIT, Fetch +from mani_skill2.envs.scene import ManiSkillScene from mani_skill2.utils.scene_builder import SceneBuilder +from mani_skill2.utils.structs.actor import Actor from .constants import SCENE_SOURCE_TO_DATASET, SceneConfig, load_ai2thor_metadata @@ -33,6 +36,9 @@ None, ) +# TODO (arth): fix coacd so this isn't necessary +WORKING_OBJS = ["apple", "potato", "tomato"] + class AI2THORBaseSceneBuilder(SceneBuilder): """ @@ -40,8 +46,11 @@ class AI2THORBaseSceneBuilder(SceneBuilder): """ scene_dataset: str + robot_init_qpos_noise: float = 0.02 - def __init__(self): + def __init__(self, env, robot_init_qpos_noise=0.02): + self.env = env + self.robot_init_qpos_noise = robot_init_qpos_noise global OBJECT_SEMANTIC_ID_MAPPING, SEMANTIC_ID_OBJECT_MAPPING, MOVEABLE_OBJECT_IDS ( OBJECT_SEMANTIC_ID_MAPPING, @@ -61,7 +70,10 @@ def __init__(self): else: self._scene_configs = ALL_SCENE_CONFIGS[self.scene_dataset] - super().__init__() + self._scene_navigable_positions = [None] * len(self._scene_configs) + self.actor_default_poses: List[Tuple[Actor, sapien.Pose]] = [] + + self.scene_idx = None def _should_be_kinematic(self, template_name: str): object_config_json = ( @@ -75,14 +87,16 @@ def _should_be_kinematic(self, template_name: str): object_category = SEMANTIC_ID_OBJECT_MAPPING[semantic_id] return object_category not in MOVEABLE_OBJECT_IDS + # TODO (arth): figure out coacd building issues (currenlty fails on > 80% objs) def build( - self, scene: sapien.Scene, scene_id=0, convex_decomposition="none", **kwargs + self, scene: ManiSkillScene, scene_idx=0, convex_decomposition="none", **kwargs ): # save scene and movable objects when building scene - self._scene_objects: List[sapien.Entity] = [] - self._movable_objects: List[sapien.Entity] = [] + self._scene_objects: Dict[str, Actor] = dict() + self._movable_objects: Dict[str, Actor] = dict() + self.scene_idx = scene_idx - scene_cfg = self._scene_configs[scene_id] + scene_cfg = self._scene_configs[scene_idx] dataset = SCENE_SOURCE_TO_DATASET[scene_cfg.source] with open(osp.join(dataset.dataset_path, scene_cfg.config_file), "rb") as f: @@ -94,21 +108,22 @@ def build( / f"{scene_json['stage_instance']['template_name']}.glb" ) builder = scene.create_actor_builder() - builder.add_visual_from_file(bg_path) - builder.add_nonconvex_collision_from_file(bg_path) - bg = builder.build_kinematic(name="scene_background") - q = transforms3d.quaternions.axangle2quat( + + bg_q = transforms3d.quaternions.axangle2quat( np.array([1, 0, 0]), theta=np.deg2rad(90) ) if self.scene_dataset == "ProcTHOR": # for some reason the scene needs to rotate around y-axis by 90 degrees for ProcTHOR scenes from hssd dataset - q = transforms3d.quaternions.qmult( + bg_q = transforms3d.quaternions.qmult( q, transforms3d.quaternions.axangle2quat( np.array([0, -1, 0]), theta=np.deg2rad(90) ), ) - bg.set_pose(sapien.Pose(q=q)) + bg_pose = sapien.Pose(q=bg_q) + builder.add_visual_from_file(bg_path, pose=bg_pose) + builder.add_nonconvex_collision_from_file(bg_path, pose=bg_pose) + self.bg = builder.build_static(name="scene_background") global_id = 0 for object in tqdm(scene_json["object_instances"][:]): @@ -119,58 +134,127 @@ def build( ) actor_id = f"{object['template_name']}_{global_id}" global_id += 1 + q = transforms3d.quaternions.axangle2quat( + np.array([1, 0, 0]), theta=np.deg2rad(90) + ) + rot_q = [ + object["rotation"][0], + object["rotation"][1], + object["rotation"][2], + object["rotation"][-1], + ] + q = transforms3d.quaternions.qmult(q, rot_q) + builder = scene.create_actor_builder() - builder.add_visual_from_file(str(model_path)) - if self._should_be_kinematic(object["template_name"]): + if self._should_be_kinematic(object["template_name"]) or not np.any( + [name in actor_id.lower() for name in WORKING_OBJS] + ): + position = [ + object["translation"][0], + -object["translation"][2], + object["translation"][1] + 0, + ] position = [ object["translation"][0], -object["translation"][2], object["translation"][1] + 0, ] - builder.add_nonconvex_collision_from_file(str(model_path)) - actor = builder.build_kinematic(name=actor_id) + pose = sapien.Pose(p=position, q=q) + builder.add_visual_from_file(str(model_path), pose=pose) + builder.add_nonconvex_collision_from_file(str(model_path), pose=pose) + actor = builder.build_static(name=actor_id) else: position = [ object["translation"][0], -object["translation"][2], object["translation"][1] + 0.005, ] + builder.add_visual_from_file(str(model_path)) builder.add_multiple_convex_collisions_from_file( - str(model_path), decomposition=convex_decomposition + str(model_path), + decomposition=convex_decomposition, ) actor = builder.build(name=actor_id) - self._scene_objects.append(actor) + self._movable_objects[actor.name] = actor + pose = sapien.Pose(p=position, q=q) + self.actor_default_poses.append((actor, pose)) + self._scene_objects[actor_id] = actor - q = transforms3d.quaternions.axangle2quat( - np.array([1, 0, 0]), theta=np.deg2rad(90) + if self._scene_navigable_positions[scene_idx] is None: + self._scene_navigable_positions[scene_idx] = np.load( + Path(dataset.dataset_path) + / ( + Path(scene_cfg.config_file).stem.split(".")[0] + + f".{self.env.robot_uids}.navigable_positions.npy" + ) ) - rot_q = [ - object["rotation"][0], - object["rotation"][1], - object["rotation"][2], - object["rotation"][-1], - ] - q = transforms3d.quaternions.qmult(q, rot_q) - pose = sapien.Pose(p=position, q=q) + + def disable_fetch_ground_collisions(self): + # TODO (stao) (arth): is there a better way to model robots in sim. This feels very unintuitive. + for body in self.bg._bodies: + cs = body.get_collision_shapes()[0] + cg = cs.get_collision_groups() + cg[2] |= FETCH_UNIQUE_COLLISION_BIT + cs.set_collision_groups(cg) + + def set_actor_default_poses_vels(self): + for actor, pose in self.actor_default_poses: actor.set_pose(pose) + actor.set_linear_velocity([0, 0, 0]) + actor.set_angular_velocity([0, 0, 0]) + + def initialize(self, env_idx): - # get movable objects - self._movable_objects = [ - obj - for obj in self.scene_objects - if not obj.find_component_by_type( - physx.PhysxRigidDynamicComponent - ).kinematic - ] + self.set_actor_default_poses_vels() + + if self.env.robot_uids == "panda": + # fmt: off + # EE at [0.615, 0, 0.17] + qpos = np.array( + [0.0, np.pi / 8, 0, -np.pi * 5 / 8, 0, np.pi * 3 / 4, np.pi / 4, 0.04, 0.04] + ) + # fmt: on + qpos[:-2] += self.env._episode_rng.normal( + 0, self.robot_init_qpos_noise, len(qpos) - 2 + ) + self.env.agent.reset(qpos) + self.env.agent.robot.set_pose(sapien.Pose([-0.615, 0, 0])) + elif self.env.robot_uids == "xmate3_robotiq": + qpos = np.array( + [0, np.pi / 6, 0, np.pi / 3, 0, np.pi / 2, -np.pi / 2, 0, 0] + ) + qpos[:-2] += self.env._episode_rng.normal( + 0, self.robot_init_qpos_noise, len(qpos) - 2 + ) + self.env.agent.reset(qpos) + self.env.agent.robot.set_pose(sapien.Pose([-0.562, 0, 0])) + elif self.env.robot_uids == "fetch": + agent: Fetch = self.env.agent + agent.reset(agent.RESTING_QPOS) + agent.robot.set_pose(sapien.Pose([*self.navigable_positions[0], 0.001])) + self.disable_fetch_ground_collisions() + else: + raise NotImplementedError(self.env.robot_uids) @property - def scene_configs(self): + def scene_configs(self) -> List[SceneConfig]: return self._scene_configs @property - def scene_objects(self): - return self._scene_objects + def navigable_positions(self) -> np.ndarray: + assert isinstance( + self.scene_idx, int + ), "Must build scene before getting navigable positions" + return self._scene_navigable_positions[self.scene_idx] + + @property + def scene_objects(self) -> List[Actor]: + return list(self._scene_objects.values()) + + @property + def movable_objects(self) -> List[Actor]: + return list(self._movable_objects.values()) @property - def movable_objects(self): + def movable_objects_by_id(self) -> Dict[str, Actor]: return self._movable_objects diff --git a/mani_skill2/utils/scene_builder/replicacad/metadata/scene_configs.json b/mani_skill2/utils/scene_builder/replicacad/metadata/scene_configs.json new file mode 100644 index 000000000..173a38ebe --- /dev/null +++ b/mani_skill2/utils/scene_builder/replicacad/metadata/scene_configs.json @@ -0,0 +1,10 @@ +{ + "scenes": [ + "apt_0.scene_instance.json", + "apt_1.scene_instance.json", + "apt_2.scene_instance.json", + "apt_3.scene_instance.json", + "apt_4.scene_instance.json", + "apt_5.scene_instance.json" + ] +} diff --git a/mani_skill2/utils/scene_builder/replicacad/scene_builder.py b/mani_skill2/utils/scene_builder/replicacad/scene_builder.py new file mode 100644 index 000000000..864b10d41 --- /dev/null +++ b/mani_skill2/utils/scene_builder/replicacad/scene_builder.py @@ -0,0 +1,215 @@ +""" +Code for building scenes from the ReplicaCAD dataset https://aihabitat.org/datasets/replica_cad/ + +This code is also heavily commented to serve as a tutorial for how to build custom scenes from scratch and/or port scenes over from other datasets/simulators +""" + +import json +import os.path as osp + +import numpy as np +import sapien +import torch +import transforms3d + +from mani_skill2 import ASSET_DIR +from mani_skill2.agents.robots.fetch.fetch import FETCH_UNIQUE_COLLISION_BIT, Fetch +from mani_skill2.envs.scene import ManiSkillScene +from mani_skill2.utils.scene_builder import SceneBuilder +from mani_skill2.utils.structs.articulation import Articulation + +DATASET_CONFIG_DIR = osp.join(osp.dirname(__file__), "metadata") + + +class ReplicaCADSceneBuilder(SceneBuilder): + builds_lighting = True + + def __init__(self, env, robot_init_qpos_noise=0.02): + super().__init__(env, robot_init_qpos_noise) + # Scene datasets from any source generally have several configurations, each of which may involve changing object geometries, poses etc. + # You should store this configuration information in the self._scene_configs list, which permits the code to sample from when + # simulating more than one scene or performing reconfiguration + + # for ReplicaCAD we have saved the list of all scene configuration files from the dataset to a local json file and create SceneConfig objects out of it + with open(osp.join(DATASET_CONFIG_DIR, "scene_configs.json")) as f: + self.scene_configs = json.load(f)["scenes"] + + def build( + self, scene: ManiSkillScene, scene_idx=0, convex_decomposition="none", **kwargs + ): + """ + Given a ManiSkillScene, a sampled scene_idx, build/load the scene objects + + scene_idx is an index corresponding to a sampled scene config in self._scene_configs. The code should... + TODO (stao): scene_idx should probably be replaced with scene config? + + TODO (stao): provide a simple way in maybe SceneBuilder to override how to decide if an object should be dynamic or not? + """ + scene_cfg_path = self.scene_configs[scene_idx] + + # We read the json config file describing the scene setup for the selected ReplicaCAD scene + with open( + osp.join( + ASSET_DIR, + "scene_datasets/replica_cad_dataset/configs/scenes", + scene_cfg_path, + ), + "rb", + ) as f: + scene_json = json.load(f) + + # The complex part of porting over scene datasets is that each scene dataset often has it's own format and there is no + # one size fits all solution to read that format and use it. The best way to port a scene dataset over is to look + # at the configuration files, get a sense of the pattern and find how they reference .glb model files and potentially + # decomposed convex meshes for physical simulation + + # ReplicaCAD stores the background model here + background_template_name = osp.basename( + scene_json["stage_instance"]["template_name"] + ) + bg_path = str( + ASSET_DIR + / f"scene_datasets/replica_cad_dataset/stages/{background_template_name}.glb" + ) + builder = scene.create_actor_builder() + # Note all ReplicaCAD assets are rotated by 90 degrees as they use a different xyz convention to SAPIEN/ManiSkill. + q = transforms3d.quaternions.axangle2quat( + np.array([1, 0, 0]), theta=np.deg2rad(90) + ) + bg_pose = sapien.Pose(q=q) + + # When creating objects that do not need to be moved ever, you must provide the pose of the object ahead of time + # and use builder.build_static. Objects such as the scene background (also called a stage) fits in this category + builder.add_visual_from_file(bg_path, pose=bg_pose) + builder.add_nonconvex_collision_from_file(bg_path, pose=bg_pose) + self.bg = builder.build_static(name="scene_background") + + # For the purposes of physical simulation, we disable collisions between the Fetch robot and the scene background + self.disable_fetch_ground_collisions() + + # In scenes, there will always be dynamic objects, kinematic objects, and static objects. + # In the case of ReplicaCAD there are only dynamic and static objects. Since dynamic objects can be moved during simulation + # we need to keep track of the initial poses of each dynamic actor we create. + self.default_object_poses = [] + for obj_meta in scene_json["object_instances"]: + + # Again, for any dataset you will have to figure out how they reference object files + # Note that ASSET_DIR will always refer to the ~/.ms_data folder or whatever MS_ASSET_DIR is set to + obj_cfg_path = osp.join( + ASSET_DIR, + "scene_datasets/replica_cad_dataset/configs/objects", + f"{osp.basename(obj_meta['template_name'])}.object_config.json", + ) + with open(obj_cfg_path) as f: + obj_cfg = json.load(f) + visual_file = osp.join(osp.dirname(obj_cfg_path), obj_cfg["render_asset"]) + if "collision_asset" in obj_cfg: + collision_file = osp.join( + osp.dirname(obj_cfg_path), obj_cfg["collision_asset"] + ) + builder = scene.create_actor_builder() + pos = obj_meta["translation"] + rot = obj_meta["rotation"] + # left multiplying by the offset quaternion we used for the stage/scene background as all assets in ReplicaCAD are rotated by 90 degrees + pose = sapien.Pose(q=q) * sapien.Pose(pos, rot) + + # Neatly for simulation, ReplicaCAD specifies if an object is meant to be simulated as dynamic (can be moved like pots) or static (must stay still, like kitchen counters) + if obj_meta["motion_type"] == "DYNAMIC": + builder.add_visual_from_file(visual_file) + if ( + "use_bounding_box_for_collision" in obj_cfg + and obj_cfg["use_bounding_box_for_collision"] + ): + # some dynamic objects do not have decomposed convex meshes and instead should use a simple bounding box for collision detection + # in this case we use the add_convex_collision_from_file function of SAPIEN which just creates a convex collision based on the visual mesh + builder.add_convex_collision_from_file(visual_file) + else: + builder.add_multiple_convex_collisions_from_file(collision_file) + actor = builder.build(name=obj_meta["template_name"]) + self.default_object_poses.append( + (actor, pose * sapien.Pose(p=[0, 0, 0.0])) + ) + elif obj_meta["motion_type"] == "STATIC": + builder.add_visual_from_file(visual_file, pose=pose) + # for static (and dynamic) objects you don't need to use pre convex decomposed meshes and instead can directly + # add the non convex collision mesh based on the visual mesh + builder.add_nonconvex_collision_from_file(visual_file, pose=pose) + actor = builder.build_static(name=obj_meta["template_name"]) + + # ReplicaCAD also provides articulated objects + for articulated_meta in scene_json["articulated_object_instances"]: + + template_name = articulated_meta["template_name"] + pos = articulated_meta["translation"] + rot = articulated_meta["rotation"] + # articulated_cfg_path = osp.join( + # ASSET_DIR, + # "scene_datasets/replica_cad_dataset/urdf/objects", + # f"{osp.basename(obj_meta['template_name'])}.object_config.json", + # ) + urdf_path = osp.join( + ASSET_DIR, + f"scene_datasets/replica_cad_dataset/urdf/{template_name}/{template_name}.urdf", + ) + urdf_loader = scene.create_urdf_loader() + urdf_loader.name = template_name + urdf_loader.fix_root_link = articulated_meta["fixed_base"] + urdf_loader.disable_self_collisions = True + if "uniform_scale" in articulated_meta: + urdf_loader.scale = articulated_meta["uniform_scale"] + articulation = urdf_loader.load(urdf_path) + pose = sapien.Pose(q=q) * sapien.Pose(pos, rot) + self.default_object_poses.append((articulation, pose)) + + # ReplicaCAD also specifies where to put lighting + with open( + osp.join( + ASSET_DIR, + "scene_datasets/replica_cad_dataset/configs/lighting", + f"{osp.basename(scene_json['default_lighting'])}.lighting_config.json", + ) + ) as f: + lighting_cfg = json.load(f) + for light_cfg in lighting_cfg["lights"].values(): + # It appears ReplicaCAD only specifies point light sources so we only use those here + if light_cfg["type"] == "point": + light_pos_fixed = ( + sapien.Pose(q=q) * sapien.Pose(p=light_cfg["position"]) + ).p + # In SAPIEN, one can set color to unbounded values, higher just means more intense. ReplicaCAD provides color and intensity separately so + # we multiply it together here. We also take absolute value of intensity since some scene configs write negative intensities (which result in black holes) + scene.add_point_light( + light_pos_fixed, + color=np.array(light_cfg["color"]) * np.abs(light_cfg["intensity"]), + ) + scene.set_ambient_light([0.3, 0.3, 0.3]) + + def initialize(self, env_idx): + if self.env.robot_uids == "fetch": + agent: Fetch = self.env.agent + agent.reset(agent.RESTING_QPOS) + + # set robot to be inside the small room in the middle + # agent.robot.set_pose(sapien.Pose([0.8, 1.9, 0.001])) + # qpos = agent.robot.qpos + # qpos[:, 2] = 2.9 + # agent.robot.set_qpos(qpos) + + agent.robot.set_pose(sapien.Pose([-0.8, -1, 0.001])) + + else: + raise NotImplementedError(self.env.robot_uids) + for obj, pose in self.default_object_poses: + # TODO (stao): It's not super clear if sleeping objects works on GPU but appears to improve FPS a little. + obj.set_pose(pose) + if isinstance(obj, Articulation): + obj.set_qpos(obj.qpos * 0) + # TODO (stao): settle objects for a few steps then save poses again on first run? + + def disable_fetch_ground_collisions(self): + # TODO (stao) (arth): is there a better way to model robots in sim. This feels very unintuitive. + for body in self.bg._bodies: + cs = body.get_collision_shapes()[0] + cg = cs.get_collision_groups() + cg[2] |= FETCH_UNIQUE_COLLISION_BIT + cs.set_collision_groups(cg) diff --git a/mani_skill2/utils/scene_builder/scene_builder.py b/mani_skill2/utils/scene_builder/scene_builder.py index 14578116b..d952460d2 100644 --- a/mani_skill2/utils/scene_builder/scene_builder.py +++ b/mani_skill2/utils/scene_builder/scene_builder.py @@ -1,18 +1,23 @@ -from dataclasses import dataclass, field -from typing import List +from typing import Any, Dict, List import sapien from mani_skill2.envs.sapien_env import BaseEnv -from mani_skill2.envs.scene import ManiSkillScene from mani_skill2.utils.structs.actor import Actor -@dataclass class SceneBuilder: env: BaseEnv - _scene_objects: List[Actor] = field(default_factory=list) - _movable_objects: List[Actor] = field(default_factory=list) + _scene_objects: List[Actor] = [] + _movable_objects: List[Actor] = [] + builds_lighting: bool = False + """Whether this scene builder will add it's own lighting when build is called. If False, ManiSkill will add some default lighting""" + scene_configs: List[Any] = None + """List of scene configuration information that can be used to construct scenes. Can be simply a path to a json file or a dictionary""" + + def __init__(self, env, robot_init_qpos_noise=0.02): + self.env = env + self.robot_init_qpos_noise = robot_init_qpos_noise def build(self, **kwargs): """ @@ -31,9 +36,17 @@ def scene(self): return self.env._scene @property - def scene_objects(self): + def scene_objects(self) -> List[Actor]: + raise NotImplementedError() + + @property + def movable_objects(self) -> List[Actor]: + raise NotImplementedError() + + @property + def scene_objects_by_id(self) -> Dict[str, Actor]: raise NotImplementedError() @property - def movable_objects(self): + def movable_objects_by_id(self) -> Dict[str, Actor]: raise NotImplementedError() diff --git a/mani_skill2/utils/scene_builder/table/table_scene_builder.py b/mani_skill2/utils/scene_builder/table/table_scene_builder.py index 000ed5d2d..45bcd0662 100644 --- a/mani_skill2/utils/scene_builder/table/table_scene_builder.py +++ b/mani_skill2/utils/scene_builder/table/table_scene_builder.py @@ -1,5 +1,4 @@ import os.path as osp -from dataclasses import dataclass from pathlib import Path from typing import List @@ -9,11 +8,11 @@ from transforms3d.euler import euler2quat from mani_skill2.agents.multi_agent import MultiAgent +from mani_skill2.agents.robots.fetch import FETCH_UNIQUE_COLLISION_BIT from mani_skill2.utils.building.ground import build_ground from mani_skill2.utils.scene_builder import SceneBuilder -@dataclass class TableSceneBuilder(SceneBuilder): robot_init_qpos_noise: float = 0.02 @@ -113,16 +112,14 @@ def initialize(self): self.env.agent.reset(qpos) self.env.agent.robot.set_pose(sapien.Pose([-1.05, 0, -self.table_height])) - from mani_skill2.agents.robots.fetch import FETCH_UNIQUE_COLLISION_BIT - - cs = ( - self.ground._objs[0] - .find_component_by_type(sapien.physx.PhysxRigidStaticComponent) - .get_collision_shapes()[0] - ) - cg = cs.get_collision_groups() - cg[2] |= FETCH_UNIQUE_COLLISION_BIT - cs.set_collision_groups(cg) + # TODO (stao) (arth): is there a better way to model robots in sim. This feels very unintuitive. + for obj in self.ground._objs: + cs = obj.find_component_by_type( + sapien.physx.PhysxRigidStaticComponent + ).get_collision_shapes()[0] + cg = cs.get_collision_groups() + cg[2] |= FETCH_UNIQUE_COLLISION_BIT + cs.set_collision_groups(cg) elif self.env.robot_uids == ("panda", "panda"): agent: MultiAgent = self.env.agent qpos = np.array( diff --git a/mani_skill2/utils/structs/actor.py b/mani_skill2/utils/structs/actor.py index c4754df80..bc52ba650 100644 --- a/mani_skill2/utils/structs/actor.py +++ b/mani_skill2/utils/structs/actor.py @@ -36,6 +36,9 @@ class Actor(PhysxRigidDynamicComponentStruct, BaseStruct[sapien.Entity]): _builder_initial_pose: sapien.Pose = None name: str = None + def __hash__(self): + return self._objs[0].__hash__() + @classmethod def _create_from_entities( cls, @@ -57,6 +60,11 @@ def _create_from_entities( px_body_type = "kinematic" else: px_body_type = "dynamic" + else: + bodies = [ + ent.find_component_by_type(physx.PhysxRigidStaticComponent) + for ent in entities + ] return cls( _objs=entities, _scene=scene, diff --git a/mani_skill2/utils/structs/articulation.py b/mani_skill2/utils/structs/articulation.py index ee27a7f4c..8418b88c5 100644 --- a/mani_skill2/utils/structs/articulation.py +++ b/mani_skill2/utils/structs/articulation.py @@ -46,6 +46,8 @@ class Articulation(BaseStruct[physx.PhysxArticulation]): """Maps joint name to the Joint object""" active_joints: List[Joint] """List of active Joint objects, referencing elements in self.joints""" + active_joint_map: OrderedDict[str, Joint] + """Maps active joint name to the Joint object, referencing elements in self.joints""" name: str = None """Name of this articulation""" @@ -93,6 +95,7 @@ def _create_from_physx_articulations( joints=[], joint_map=OrderedDict(), active_joints=[], + active_joint_map=OrderedDict(), name=shared_name, _merged=_merged, ) @@ -141,13 +144,19 @@ def _create_from_physx_articulations( joint_map = OrderedDict() wrapped_joints: List[Joint] = [] for joint_index, joints in enumerate(all_joint_objs): - wrapped_joint = Joint.create(joints, self, joint_index) + try: + active_joint_index = all_active_joint_names.index( + all_joint_names[joint_index] + ) + except: + active_joint_index = None + wrapped_joint = Joint.create(joints, self, joint_index, active_joint_index) joint_map[wrapped_joint.name] = wrapped_joint wrapped_joints.append(wrapped_joint) self.joints = wrapped_joints self.joint_map = joint_map self.active_joints = [wrapped_joints[i] for i in active_joint_indices] - + self.active_joint_map = {joint.name: joint for joint in self.active_joints} return self @classmethod diff --git a/mani_skill2/utils/structs/joint.py b/mani_skill2/utils/structs/joint.py index 04b53c491..b05d5b955 100644 --- a/mani_skill2/utils/structs/joint.py +++ b/mani_skill2/utils/structs/joint.py @@ -28,6 +28,9 @@ class Joint(BaseStruct[physx.PhysxArticulationJoint]): articulation: Articulation index: int + """index of this joint among all joints""" + active_index: int + """index of this joint amongst the active joints""" child_link: Link = None parent_link: Link = None @@ -42,6 +45,7 @@ def create( physx_joints: List[physx.PhysxArticulationJoint], articulation: Articulation, joint_index: int, + active_joint_index: int, ): # naming convention of the original physx joints is "scene--_" shared_name = "_".join( @@ -69,6 +73,7 @@ def create( return cls( articulation=articulation, index=joint_index, + active_index=active_joint_index, _objs=physx_joints, _scene=articulation._scene, _scene_mask=articulation._scene_mask, diff --git a/mani_skill2/utils/structs/types.py b/mani_skill2/utils/structs/types.py index 788c1b1f7..fd3bb083c 100644 --- a/mani_skill2/utils/structs/types.py +++ b/mani_skill2/utils/structs/types.py @@ -23,7 +23,7 @@ class GPUMemoryConfig: temp_buffer_capacity: int = 2**24 """Increase this if you get 'PxgPinnedHostLinearMemoryAllocator: overflowing initial allocation size, increase capacity to at least %.' """ - max_rigid_contact_count: int = 524288 + max_rigid_contact_count: int = 2**19 max_rigid_patch_count: int = ( 2**18 ) # 81920 is SAPIEN default but most tasks work with 2**18 diff --git a/mani_skill2/utils/wrappers/record.py b/mani_skill2/utils/wrappers/record.py index 3a87ec5e4..afa6a4b73 100644 --- a/mani_skill2/utils/wrappers/record.py +++ b/mani_skill2/utils/wrappers/record.py @@ -214,13 +214,14 @@ def capture_image(self): def reset( self, - *, + *args, seed: Optional[Union[int, List[int]]] = None, - options: Optional[dict] = dict(), + options: Optional[dict] = None, **kwargs, ): skip_trajectory = False - options.pop("save_trajectory", False) + if options is not None: + options.pop("save_trajectory", False) if self.save_on_reset and self._episode_id >= 0 and not skip_trajectory: self.flush_trajectory(ignore_empty_transition=True) @@ -235,7 +236,7 @@ def reset( self._episode_id += 1 reset_kwargs = copy.deepcopy(dict(seed=seed, options=options, **kwargs)) - obs, info = super().reset(seed=seed, options=options, **kwargs) + obs, info = super().reset(*args, seed=seed, options=options, **kwargs) if self.save_trajectory: state = self._base_env.get_state() diff --git a/manualtest/scene_gpu.py b/manualtest/scene_gpu.py new file mode 100644 index 000000000..20e3d1e69 --- /dev/null +++ b/manualtest/scene_gpu.py @@ -0,0 +1,97 @@ +import gymnasium as gym +import numpy as np +import torch + +from mani_skill2.envs.scenes.base_env import SceneManipulationEnv +from mani_skill2.utils.scene_builder.ai2thor import ( + ArchitecTHORSceneBuilder, + ProcTHORSceneBuilder, + RoboTHORSceneBuilder, + iTHORSceneBuilder, +) +from mani_skill2.utils.scene_builder.replicacad.scene_builder import ( + ReplicaCADSceneBuilder, +) +from mani_skill2.utils.wrappers import RecordEpisode + +if __name__ == "__main__": + + render_mode = "human" + print("RENDER_MODE", render_mode) + + env = gym.make( + "SceneManipulation-v1", + render_mode=render_mode, + robot_uids="fetch", + scene_builder_cls=ReplicaCADSceneBuilder, + num_envs=1, + # force_use_gpu_sim=True, + scene_idxs=3, + # obs_mode="rgbd" + ) + obs, info = env.reset(seed=0) + # import matplotlib.pyplot as plt + # import ipdb;ipdb.set_trace() + viewer = env.render() + # base_env: SequentialTaskEnv = env.unwrapped + viewer.paused = True + viewer = env.render() + for i in range(10000): + action = np.zeros(env.action_space.shape) + # action[..., -7] = -1 + if i < 60: + action[..., -3] = 1 + elif i < 100: + action[..., -3] = 1 + action[..., -1] = 0.3 + elif i < 200: + action[..., -3] = 1 + # action[..., -1] = .22 + else: + action = env.action_space.sample() + if viewer.window.key_press("r"): + obs, info = env.reset() + # + env.step(action) + # print( + # robot.qpos[:, robot.active_joint_map["r_gripper_finger_joint"].active_index] + # ) + # print( + # robot.qpos[:, robot.active_joint_map["l_gripper_finger_joint"].active_index] + # ) + + env.render() + # agent_obs = obs["agent"] + # extra_obs = obs["extra"] + # fetch_head_depth = obs["sensor_data"]["fetch_head"]["depth"] + # fetch_hand_depth = obs["sensor_data"]["fetch_hand"]["depth"] + + # # fetch_head_depth = torch.Tensor(fetch_head_depth.astype(np.int16)) + # # fetch_hand_depth = torch.Tensor(fetch_hand_depth.astype(np.int16)) + + # import matplotlib.pyplot as plt + # plt.imsave('out.png', np.concatenate([fetch_head_depth, fetch_hand_depth], axis=-2)[..., 0]) + + # env = RecordEpisode(env, '.', save_trajectory=False) + # env.reset(seed=0) + + # # print force on robot + # robot_link_names = [link.name for link in env.agent.robot.links] + # print(robot_link_names) + # print(torch.norm(env.agent.robot.get_net_contact_forces(robot_link_names), dim=-1)) + + # for step_num in range(30): + # action = np.zeros(env.action_space.shape) + + # # torso up + # action[..., -4] = 1 + # # head still + # action[..., -5] = 0 + # # gripper open + # action[..., -7] = 1 + + # # move forward and left + # action[..., -3:-1] = 1 + + # obs, rew, term, trunc, info = env.step(action=action) + # env.close() diff --git a/manualtest/scene_manip_test.py b/manualtest/scene_manip_test.py new file mode 100644 index 000000000..f6b56f95d --- /dev/null +++ b/manualtest/scene_manip_test.py @@ -0,0 +1,96 @@ +from mani_skill2.envs.scenes.tasks import SequentialTaskEnv, PickSequentialTaskEnv +from mani_skill2.envs.scenes.tasks.planner import PickSubtask, plan_data_from_file +from mani_skill2.utils.scene_builder.ai2thor import ( + ProcTHORSceneBuilder, ArchitecTHORSceneBuilder, + iTHORSceneBuilder, RoboTHORSceneBuilder, +) +from mani_skill2.envs.scenes.base_env import SceneManipulationEnv +from mani_skill2.utils.wrappers import RecordEpisode + + +import gymnasium as gym +import numpy as np +import torch + +import os + +render_mode = ( + "rgb_array" if ( + "SAPIEN_NO_DISPLAY" in os.environ + and int(os.environ["SAPIEN_NO_DISPLAY"]) == 1 + ) else "human" +) +render_mode = "rgb_array" +print("RENDER_MODE", render_mode) + +SCENE_IDX_TO_APPLE_PLAN = { + 0: [PickSubtask(obj_id="objects/Apple_5_111")], + 1: [PickSubtask(obj_id="objects/Apple_16_40")], + 2: [PickSubtask(obj_id="objects/Apple_12_64")], + 3: [PickSubtask(obj_id="objects/Apple_29_113")], + 4: [PickSubtask(obj_id="objects/Apple_28_35")], + 5: [PickSubtask(obj_id="objects/Apple_17_88")], + 6: [PickSubtask(obj_id="objects/Apple_1_35")], + 7: [PickSubtask(obj_id="objects/Apple_25_48")], + 8: [PickSubtask(obj_id="objects/Apple_9_46")], + 9: [PickSubtask(obj_id="objects/Apple_13_72")], +} + +SCENE_IDX = 6 +env: SequentialTaskEnv = gym.make( + 'SequentialTask-v0', + obs_mode='rgbd', + render_mode=render_mode, + control_mode='pd_joint_delta_pos', + reward_mode='dense', + robot_uids='fetch', + scene_builder_cls=ArchitecTHORSceneBuilder, + task_plans=[SCENE_IDX_TO_APPLE_PLAN[SCENE_IDX]], + scene_idxs=SCENE_IDX, + # num_envs=2, +) + +print(env.unwrapped._init_raw_obs) + +# print(env.observation_space.keys()) + +obs, info = env.reset(seed=0) + +# agent_obs = obs["agent"] +# extra_obs = obs["extra"] +# fetch_head_depth = obs["sensor_data"]["fetch_head"]["depth"] +# fetch_hand_depth = obs["sensor_data"]["fetch_hand"]["depth"] + +# # fetch_head_depth = torch.Tensor(fetch_head_depth.astype(np.int16)) +# # fetch_hand_depth = torch.Tensor(fetch_hand_depth.astype(np.int16)) + +# import matplotlib.pyplot as plt +# plt.imsave('out.png', np.concatenate([fetch_head_depth, fetch_hand_depth], axis=-2)[..., 0]) + + + + + +# env = RecordEpisode(env, '.', save_trajectory=False) +# env.reset(seed=0) + +# # print force on robot +# robot_link_names = [link.name for link in env.agent.robot.links] +# print(robot_link_names) +# print(torch.norm(env.agent.robot.get_net_contact_forces(robot_link_names), dim=-1)) + +# for step_num in range(30): +# action = np.zeros(env.action_space.shape) + +# # torso up +# action[..., -4] = 1 +# # head still +# action[..., -5] = 0 +# # gripper open +# action[..., -7] = 1 + +# # move forward and left +# action[..., -3:-1] = 1 + +# obs, rew, term, trunc, info = env.step(action=action) +# env.close() diff --git a/setup.py b/setup.py index 65b682e14..4a50dd4ea 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ "imageio", "imageio[ffmpeg]", "pytorch_kinematics", + "huggingface_hub", # we use HF to version control some assets/datasets more easily ], # Glob patterns do not automatically match dotfiles package_data={