diff --git a/.gitignore b/.gitignore
index fdea98447..6410f2b71 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,10 +31,11 @@ imgui.ini
 
 # directories that should be ignored in general
 # but can be excluded explicitly
-tmp/
-data/
-demos/
-videos/
+/tmp
+/data
+/demos
+/videos
+/examples/benchmarking/videos
 
 # testing
 .coverage*
diff --git a/README.md b/README.md
index 3e79054a9..e30808d7d 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ cd ManiSkill2 && pip install -e .
 A GPU with the Vulkan driver installed is required to enable rendering in ManiSkill2. The rigid-body environments, powered by SAPIEN, are ready to use after installation. Test your installation:
 
 ```bash
-# Run an episode (at most 200 steps) of "PickCube-v0" (a rigid-body environment) with random actions
+# Run an episode (at most 200 steps) of "PickCube-v1" (a rigid-body environment) with random actions
 # Or specify an environment by "-e ${ENV_ID}"
 python -m mani_skill2.examples.demo_random_action
 ```
@@ -79,7 +79,7 @@ Here is a basic example of how to make an [Gym/Gymnasium](https://github.com/far
 import gymnasium as gym
 import mani_skill2.envs
 
-env = gym.make("PickCube-v0", obs_mode="rgbd", control_mode="pd_joint_delta_pos", render_mode="human")
+env = gym.make("PickCube-v1", obs_mode="rgbd", control_mode="pd_joint_delta_pos", render_mode="human")
 print("Observation space", env.observation_space)
 print("Action space", env.action_space)
 
diff --git a/docs/README.md b/docs/README.md
index 609516081..2dd6d9a9b 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -20,3 +20,6 @@ Start a server to watch changes
 # In docs/
 sphinx-autobuild ./source ./build/html
 ```
+
+
+For github links for the time being must double check they link the right branch/commit
\ No newline at end of file
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 6c1655605..3761a9ae9 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,6 +1,6 @@
 sphinx==6.2.1
 sphinx-autobuild
-sphinx-book-theme
+pydata_sphinx_theme
 # For spelling
 sphinxcontrib.spelling
 # Type hints support
@@ -9,4 +9,5 @@ sphinx-autodoc-typehints
 sphinx_copybutton
 # Markdown parser
 myst-parser
-sphinx-subfigure
\ No newline at end of file
+sphinx-subfigure
+sphinxcontrib-video
\ No newline at end of file
diff --git a/docs/source/_static/videos/demo.mp4 b/docs/source/_static/videos/demo.mp4
new file mode 100644
index 000000000..588a20ae8
Binary files /dev/null and b/docs/source/_static/videos/demo.mp4 differ
diff --git a/docs/source/_static/videos/fetch_random_action_replica_cad_rt.mp4 b/docs/source/_static/videos/fetch_random_action_replica_cad_rt.mp4
new file mode 100644
index 000000000..61c81f63f
Binary files /dev/null and b/docs/source/_static/videos/fetch_random_action_replica_cad_rt.mp4 differ
diff --git a/docs/source/_static/videos/teleop-stackcube-demo.mp4 b/docs/source/_static/videos/teleop-stackcube-demo.mp4
new file mode 100644
index 000000000..d80ce56a0
Binary files /dev/null and b/docs/source/_static/videos/teleop-stackcube-demo.mp4 differ
diff --git a/docs/source/additional_resources/performance_benchmarking.md b/docs/source/additional_resources/performance_benchmarking.md
deleted file mode 100644
index b368d7c93..000000000
--- a/docs/source/additional_resources/performance_benchmarking.md
+++ /dev/null
@@ -1 +0,0 @@
-# Performance Benchmarking
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 48aff58a5..57cfbc931 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,3 +1,4 @@
+from mani_skill2 import __version__
 # Configuration file for the Sphinx documentation builder.
 #
 # For the full list of built-in configuration values, see the documentation:
@@ -6,11 +7,11 @@
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
-project = "ManiSkill3"
-copyright = "2024, ManiSkill3 Contributors"
-author = "ManiSkill3 Contributors"
-release = "3.0.0"
-version = "3.0.0"
+project = "ManiSkill"
+copyright = "2024, ManiSkill Contributors"
+author = "ManiSkill Contributors"
+release = __version__
+version = __version__
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
@@ -22,6 +23,7 @@
     "sphinx_copybutton",
     "myst_parser",
     "sphinx_subfigure",
+    "sphinxcontrib.video",
 ]
 
 # https://myst-parser.readthedocs.io/en/latest/syntax/optional.html
@@ -36,12 +38,28 @@
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_theme = "sphinx_book_theme"
+html_theme = "pydata_sphinx_theme"
+html_theme_options = {
+    "use_edit_page_button": True,
+    "icon_links": [
+        {
+            "name": "GitHub",
+            "url": "https://github.com/haosulab/maniskill2",
+            "icon": "fa-brands fa-github",
+        }
+    ],
+    "external_links": [
+        {"name": "Changelog", "url": "https://github.com/haosulab/maniskill2/releases"},
+    ]
 
+}
 html_context = {
     "display_github": True,
     "github_user": "haosulab",
     "github_repo": "ManiSkill2",
-    "github_version": "main",
-    "conf_py_path": "/source/"
-}
\ No newline at end of file
+    "github_version": "dev",
+    "conf_py_path": "/source/",
+    "doc_path": "docs/source"
+}
+
+html_static_path = ['_static']
diff --git a/docs/source/datasets/teleoperation.md b/docs/source/datasets/teleoperation.md
deleted file mode 100644
index 8cd4398cb..000000000
--- a/docs/source/datasets/teleoperation.md
+++ /dev/null
@@ -1 +0,0 @@
-# Teleoperation
\ No newline at end of file
diff --git a/docs/source/getting_started/quickstart.md b/docs/source/getting_started/quickstart.md
deleted file mode 100644
index dfe8646f4..000000000
--- a/docs/source/getting_started/quickstart.md
+++ /dev/null
@@ -1,115 +0,0 @@
-# Quickstart
-
-## Gym Interface
-
-Here is a basic example of how to make a ManiSkill2 environment following the interface of [Gymnasium](https://gymnasium.farama.org/) and run a random policy.
-
-```python
-import gymnasium as gym
-import mani_skill2.envs
-
-env = gym.make(
-    "PickCube-v0", # there are more tasks e.g. "PushCube-v0", "PegInsertionSide-v0, ...
-    obs_mode="state", # there is also "state_dict", "rgbd", ...
-    control_mode="pd_ee_delta_pose", # there is also "pd_joint_delta_pos", ...
-    render_mode="human"
-)
-print("Observation space", env.observation_space)
-print("Action space", env.action_space)
-
-obs, _ = env.reset(seed=0) # reset with a seed for determinism
-done = False
-while not done:
-    action = env.action_space.sample()
-    obs, reward, terminated, truncated, info = env.step(action)
-    done = terminated or truncated
-    env.render()  # a display is required to render
-env.close()
-```
-
-Each ManiSkill2 environment supports different **observation modes** and **control modes**, which determine its **observation space** and **action space**. They can be specified by `gym.make(env_id, obs_mode=..., control_mode=...)`.
-
-The common observation modes are `state`, `rgbd`, `pointcloud`. We also support `state_dict` (states organized as a hierarchical dictionary) and `image` (raw visual observations without postprocessing). Please refer to [Observation](../concepts/observation.md) for more details.
-
-We support a wide range of controllers. Different controllers can have different effects on your algorithms. Thus, it is recommended to understand the action space you are going to use. Please refer to [Controllers](../concepts/controllers.md) for more details.
-
-Some environments require **downloading assets**. You can download all the assets by `python -m mani_skill2.utils.download_asset all` or download task-specific assets by `python -m mani_skill2.utils.download_asset ${ENV_ID}`. The assets will be downloaded to `./data/` by default, and you can also use the environment variable `MS2_ASSET_DIR` to specify this destination. Please refer to [Environments](../concepts/environments.md) for all supported environments, and which environments require downloading assets.
-
-## Interactive Play
-
-TODO (stao): Add demo of teleoperation from camera
-
-We provide an example script to interactively play with our environments. A display is required.
-
-```bash
-# PickCube-v0 can be replaced with other environment id.
-python -m mani_skill2.examples.demo_manual_control -e PickCube-v0
-```
-
-Keyboard controls:
-
-- Press `i` (or `j`, `k`, `l`, `u`, `o`) to move the end-effector.
-- Press any key between `1` to `6` to rotate the end-effector.
-- Press `f` or `g` to open or close the gripper.
-- Press `w` (or `a`, `s`, `d`) to translate the base if the robot is mobile. Press `q` or `e` to rotate the base. Press `z` or `x` to lift the torso.
-- Press `esc` to close the viewer and exit the program.
-
-To enable an interactive viewer supported by SAPIEN, you can add `--enable-sapien-viewer`. The interactive SAPIEN viewer is more powerful for debugging (e.g., checking collision shapes, getting current poses). There will be two windows: an OpenCV window and a SAPIEN (GL) window. Pressing `0` on the focused window can switch the control to the other one.
-
-```{image} images/OpenCV-viewer.png
----
-height: 256px
-alt: OpenCV viewer
----
-```
-
-```{image} images/SAPIEN-viewer.png
----
-height: 256px
-alt: SAPIEN viewer
----
-```
-
-## GPU Parallelized/Vectorized Environments
-
-ManiSkill is powered by SAPIEN which supports GPU parallelized physics simulation and GPU parallelized rendering. This enables achieving 200,000+ state-based simulation FPS and 10,000+ FPS with rendering on a single 4090 GPU. For full benchmarking results see [this page](../additional_resources/performance_benchmarking)
-
-In order to run massively parallelized environments on a GPU, it is as simple as adding the `num_envs` argument to `gym.make` as so
-
-```python
-import gymnasium as gym
-import mani_skill2.envs
-
-env = gym.make("PickCube-v0", num_envs=1024)
-print(env.observation_space) # will now have shape (1024, ...)
-print(env.action_space) # will now have shape (1024, ...)
-```
-
-To benchmark the parallelized simulation, you can run 
-
-```bash
-python -m mani_skill2.examples.benchmarking.gpu_sim --num-envs=1024
-```
-
-To try out the parallelized rendering, you can run
-
-```bash
-# rendering RGB + Depth data from all cameras
-python -m mani_skill2.examples.benchmarking.gpu_sim --num-envs=128 --obs-mode="rgbd"
-# directly save 128 videos of the visual observations put into one video
-python -m mani_skill2.examples.benchmarking.gpu_sim --num-envs=128 --save-video
-```
-
-
-<!-- 
-We provide examples to use our `VecEnv` with [Stable-baselines3](https://stable-baselines3.readthedocs.io/en/master/). Please refer to our [notebook](https://github.com/haosulab/ManiSkill2/blob/main/examples/tutorials/2_reinforcement_learning.ipynb) or [example scripts](https://github.com/haosulab/ManiSkill2/tree/main/examples/tutorials/reinforcement-learning). -->
-
-<!-- ---
-
-**Implementation details**: The vectorized environment is optimized for visual observations. In short, the vectorized environment creates multiple python processes (workers) to run the physical simulation for each environment. For each timestep, each worker will compute non-visual observations and rewards in parallel with rendering visual observations. Specifically, the worker (client) sends information needed for rendering to the main process (server), and the actual work of rendering is done by the server. Thus, non-visual and visual observations are obtained in parallel, and the amount of information to communicate between processes is minimized.
-
-:::{note}
-- The vectorized environment only supports observation modes including visual observations (`rgbd`, `pointcloud`, `image`). If only state observations are needed, most RL libraries (like Stable-baselines3) provide their implementations of multi-process vectorized environments.
-- The visual observations (rendered from cameras) are `torch.Tensor` while non-visual observations are `numpy.ndarray`. It is critical to keep tensors on the GPU for overall efficiency.
-- `env.render()` is not supported in the vectorized environment. We suggest that you only use our implementation of vectorized environments for training.
-::: -->
\ No newline at end of file
diff --git a/docs/source/index.md b/docs/source/index.md
new file mode 100644
index 000000000..5a955db25
--- /dev/null
+++ b/docs/source/index.md
@@ -0,0 +1,22 @@
+# ManiSkill
+
+ManiSkill is a feature-rich GPU-accelerated robotics benchmark built on top of [SAPIEN](https://github.com/haosulab/sapien) designed to provide accessible support for a wide array of applications from robot learning, learning from demonstrations, sim2real/real2sim, and more. 
+
+Features:
+
+* GPU parallelized simulation enabling 200,000+ FPS on some tasks
+* GPU parallelized rendering enabling 10,000+ FPS on some tasks, massively outperforming other benchmarks
+* Flexible API to build custom tasks of any complexity
+* Variety of verified robotics tasks with diverse dynamics and visuals
+* Reproducible baselines in Reinforcement Learning and Learning from Demonstrations, spread across tasks from dextrous manipulation to mobile manipulation 
+
+
+## User Guide
+
+A user guide on how to use ManiSkill with GPU parallelized simulation for your robotics and machine learning workflows
+```{toctree}
+:maxdepth: 2
+
+user_guide/index
+tasks/index
+```
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
deleted file mode 100644
index 7da080166..000000000
--- a/docs/source/index.rst
+++ /dev/null
@@ -1,48 +0,0 @@
-ManiSkill2
-=======================================
-
-ManiSkill is a feature-rich GPU-accelerated robotics benchmark built on top of SAPIEN_ designed to provide accessible support for a wide array of applications from robot learning, learning from demonstrations, sim2real/real2sim, and more. 
-
-Features:
-
-* GPU parallelized simulation enabling 200,000+ FPS on some tasks
-* GPU parallelized rendering enabling 10,000+ FPS on some tasks, massively outperforming other benchmarks
-* Flexible API to build custom tasks of any complexity
-* Variety of verified robotics tasks with diverse dynamics and visuals
-* Reproducible baselines in Reinforcement Learning and Learning from Demonstrations, spread across tasks from dextrous manipulation to mobile manipulation 
-
-
-.. toctree::
-   :caption: Getting Started
-   :maxdepth: 1
-
-   getting_started/installation
-   getting_started/quickstart
-   getting_started/docker
-   
-
-.. toctree::
-   :maxdepth: 1
-   :caption: Resources
-
-   tutorials/index
-   concepts/index
-   datasets/index
-   algorithms_and_models/index
-   workflows/index
-   benchmark/online_leaderboard
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Additional Resources
-
-   additional_resources/performance_benchmarking
-   additional_resources/education
-
-.. Indices and tables
-.. ==================
-
-.. * :ref:`genindex`
-.. * :ref:`modindex`
-.. * :ref:`search`
-.. _SAPIEN: https://github.com/haosulab/sapien
\ No newline at end of file
diff --git a/docs/source/tasks/index.md b/docs/source/tasks/index.md
new file mode 100644
index 000000000..6ec576e51
--- /dev/null
+++ b/docs/source/tasks/index.md
@@ -0,0 +1 @@
+# Tasks
\ No newline at end of file
diff --git a/docs/source/tutorials/custom_environments.md b/docs/source/tutorials/custom_environments.md
deleted file mode 100644
index f5efd0ecd..000000000
--- a/docs/source/tutorials/custom_environments.md
+++ /dev/null
@@ -1,85 +0,0 @@
-# Custom Environments/Tasks
-
-Building custom tasks in ManiSkill is straightforward and flexible. ManiSkill provides a number of features to help abstract away most of the GPU memory management required for parallel simulation and rendering.
-
-To build a custom environment/task in ManiSkill, it is comprised of the following core components
-
-1. Robot(s) and Assets
-2. Randomization
-3. Success/Failure Condition
-4. (Optional) Dense/shaped reward function
-5. (Optional) Setting up cameras/sensors for observations and rendering/recording
-
-This tutorial will first cover each of the core components, and then showcase 3 different tutorial tasks ([PushCube](#example-task-1-push-cube), [PickSingleYCB](#example-task-2-pick-single-ycb), [OpenCabinetDrawer](#example-task-3-open-cabinet-drawer)) that showcase how to use most of the features in ManiSkill.
-
-## Adding Robot(s) and Assets
-
-Loading these objects is done in the [`_load_actors`]() function.
-
-## Randomization
-
-Task initialization and randomization is handled in the [`_initalize_actors`]() function.
-
-## Success/Failure Conditions
-
-For each task, we need to determine if it has been completed successfully.
-
-
-## Advanced - Diverse objects/articulations
-
-TODO (stao)
-IDEAL API?
-V1
-
-call build_actor/build_articulation for each unique one and set mask and then build it.
-then call `merge_actors(actors: List[Actor]) -> Actor`... and it will just merge all actors? (a bit easier?)
-
-V2
-
-build entity yourself in each sub scene, then merge them all with Actor.create_from_entities(...) or something
-
-### Articulations
-
-```python
-def Articulation.merge_articulations(articulations: List[Articulation]) -> Articulation:
-  ...
-```
-
-As articulations can all have different DOFs, different links entirely, not all properties of articulations can be used easily, need masking
-
-Shared: 
-- root poses
-- bounding box
-
-Not shared
-- link children and parents
-
-
-## Example Task 1: Push Cube
-
-## Example Task 2: Pick Single YCB
-
-The goal of this example task is to demonstrate how to make task building with heterogenous object geometries easy via the actor merging API. Building tasks with heteroenous objects allows for easier diverse data collection and generaliable policy training. The complete task code is at [mani_skill2/envs/tasks/pick_single_ycb.py](https://github.com/haosulab/ManiSkill2/tree/main/mani_skill2/envs/tasks/pick_single_ycb.py)
-
-Previously in PushCube, we showed how one can simply create a single object like a cube, and ManiSkill will automatically spawn that cube in every sub-scene. To create a different object in each sub-scene, in this case a random object sampled from the YCB object Dataset, you must do this part yourself. As a user you simply write code to decide which sub-scene will have which object. This is done by creating an actor builder as usual, but now setting a scene mask to decide which sub-scenes have this object and which do not.
-
-```python
-for i, model_id in enumerate(model_ids):
-  builder, obj_height = build_actor_ycb(
-      model_id, self._scene, name=model_id, return_builder=True
-  )
-  scene_mask = np.zeros(self.num_envs, dtype=bool)
-  scene_mask[i] = True
-  builder.set_scene_mask(scene_mask)
-  actors.append(builder.build(name=f"{model_id}-{i}"))
-  self.obj_heights.append(obj_height)
-```
-
-The snippet above will now create a list of `Actor` objects, but this makes fetching data about these different actors complicated because you would have to loop over each one. Here you can now use the merge API shown below to simply merge all of these `Actor` objects in the `actors` list into one object that you can then fetch data shared across all objects like pose, linear velocity etc.
-
-```python
-self.obj = Actor.merge(actors, name="ycb_object")
-```
-
-
-## Example Task 3: Open Cabinet Drawer
\ No newline at end of file
diff --git a/docs/source/additional_resources/education.md b/docs/source/user_guide/additional_resources/education.md
similarity index 100%
rename from docs/source/additional_resources/education.md
rename to docs/source/user_guide/additional_resources/education.md
diff --git a/docs/source/user_guide/additional_resources/performance_benchmarking.md b/docs/source/user_guide/additional_resources/performance_benchmarking.md
new file mode 100644
index 000000000..875730d81
--- /dev/null
+++ b/docs/source/user_guide/additional_resources/performance_benchmarking.md
@@ -0,0 +1,28 @@
+# Performance Benchmarking
+
+
+## ManiSkill
+
+To benchmark ManiSkill + SAPIEN, after following the setup instructions on this repository's README.md, run
+
+```
+python -m mani_skill2.examples.benchmarking.gpu_sim -e "PickCube-v1" -n=4096 -o=state --control-freq=50
+python -m mani_skill2.examples.benchmarking.gpu_sim -e "PickCube-v1" -n=1536 -o=rgbd --control-freq=50
+# note we use --control-freq=50 as this is the control frequency isaac sim based repos tend to use
+```
+
+These are the expected state-based only results on a single 4090 GPU:
+```
+env.step: 277840.711 steps/s, 67.832 parallel steps/s, 100 steps in 1.474s
+env.step+env.reset: 239463.964 steps/s, 58.463 parallel steps/s, 1000 steps in 17.105s
+```
+
+These are the expected visual observations/rendering results on a single 4090 GPU:
+```
+env.step: 18549.002 steps/s, 12.076 parallel steps/s, 100 steps in 8.281s
+env.step+env.reset: 18146.848 steps/s, 11.814 parallel steps/s, 1000 steps in 84.643s
+```
+
+On 4090's generally the bottle neck is the memory available to spawn more cameras in parallel scenes. Results on high memory GPUs will be published later.
+
+<!-- TODO (stao): multi gpu setup speeds, A100 tests etc. -->
\ No newline at end of file
diff --git a/docs/source/algorithms_and_models/baselines.md b/docs/source/user_guide/algorithms_and_models/baselines.md
similarity index 94%
rename from docs/source/algorithms_and_models/baselines.md
rename to docs/source/user_guide/algorithms_and_models/baselines.md
index 4e3373fdd..cff463774 100644
--- a/docs/source/algorithms_and_models/baselines.md
+++ b/docs/source/user_guide/algorithms_and_models/baselines.md
@@ -7,7 +7,7 @@ ManiSkill has a number of baseline Reinforcement Learning (RL), Learning from De
 <!-- Acknowledgement: This neat categorization of algorithms is taken from https://github.com/tinkoff-ai/CORL -->
 
 ## Offline Only Methods
-These are algorithms that do not use online interaction with the environment to be trained and only learn from demonstration data. 
+These are algorithms that do not use online interaction with the task to be trained and only learn from demonstration data. 
 <!-- Note that some of these algorithms can be trained offline and online and are marked with a \* and discussed in a [following section](#offline--online-methods) -->
 
 | Baseline                                                   | Source                                                                                             | Results               |
@@ -18,7 +18,7 @@ These are algorithms that do not use online interaction with the environment to
 
 
 ## Online Only Methods
-These are online only algorithms that do not learn from demonstrations and optimize based on feedback from interacting with the environment. These methods also benefit from GPU simulation which can massively accelerate training time
+These are online only algorithms that do not learn from demonstrations and optimize based on feedback from interacting with the task. These methods also benefit from GPU simulation which can massively accelerate training time
 
 | Baseline                                                               | Source                                                                             | Results               |
 | ---------------------------------------------------------------------- | ---------------------------------------------------------------------------------- | --------------------- |
@@ -28,7 +28,7 @@ These are online only algorithms that do not learn from demonstrations and optim
 
 
 ## Offline + Online Methods
-These are baselines that can train on offline demonstration data as well as use online data collected from interacting with an environment.
+These are baselines that can train on offline demonstration data as well as use online data collected from interacting with an task.
 
 | Baseline                                                                                  | Source                                                                              | Results               |
 | ----------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------- |
diff --git a/docs/source/algorithms_and_models/index.md b/docs/source/user_guide/algorithms_and_models/index.md
similarity index 100%
rename from docs/source/algorithms_and_models/index.md
rename to docs/source/user_guide/algorithms_and_models/index.md
diff --git a/docs/source/benchmark/online_leaderboard.md b/docs/source/user_guide/benchmark/online_leaderboard.md
similarity index 98%
rename from docs/source/benchmark/online_leaderboard.md
rename to docs/source/user_guide/benchmark/online_leaderboard.md
index da30ab699..4284a628e 100644
--- a/docs/source/benchmark/online_leaderboard.md
+++ b/docs/source/user_guide/benchmark/online_leaderboard.md
@@ -25,7 +25,7 @@ export PYTHONPATH=${PATH_TO_YOUR_CODES_IN_HOST}:$PYTHONPATH
 # python -c "from user_solution import UserPolicy"
 
 # Run evaluation. The result will be saved to ${OUTPUT_DIR}.
-ENV_ID="PickCube-v0" OUTPUT_DIR="tmp" NUM_EPISODES=1
+ENV_ID="PickCube-v1" OUTPUT_DIR="tmp" NUM_EPISODES=1
 python -m mani_skill2.evaluation.run_evaluation -e ${ENV_ID} -o ${OUTPUT_DIR} -n ${NUM_EPISODES}
 ```
 
@@ -85,7 +85,7 @@ docker run -d --rm --gpus all --name ${CONTAINER_NAME} \
 # Interactive debug
 docker exec -it ${CONTAINER_NAME} /bin/bash
 # Or run evaluation
-docker exec -it ${CONTAINER_NAME} /bin/bash -c "export MS2_ASSET_DIR=/data; python -m mani_skill2.evaluation.run_evaluation -e PickCube-v0 -o /eval_results/PickCube-v0 -n 1"
+docker exec -it ${CONTAINER_NAME} /bin/bash -c "export MS2_ASSET_DIR=/data; python -m mani_skill2.evaluation.run_evaluation -e PickCube-v1 -o /eval_results/PickCube-v1 -n 1"
 # Finally, you can delete the container
 docker kill ${CONTAINER_NAME}
 ```
diff --git a/docs/source/concepts/controllers.md b/docs/source/user_guide/concepts/controllers.md
similarity index 88%
rename from docs/source/concepts/controllers.md
rename to docs/source/user_guide/concepts/controllers.md
index ef946d09f..b48d48c3b 100644
--- a/docs/source/concepts/controllers.md
+++ b/docs/source/user_guide/concepts/controllers.md
@@ -2,7 +2,9 @@
 
 Controllers are interfaces between policies and robots. The policy outputs actions to the controller, and the controller converts actions to control signals to the robot. For example, the `arm_pd_ee_delta_pose` controller takes the relative movement of the end-effector as input, and uses [inverse kinematics](https://en.wikipedia.org/wiki/Inverse_kinematics) to convert input actions to target positions of robot joints. The robot uses a [PD controller](https://en.wikipedia.org/wiki/PID_controller) to drive motors to achieve target joint positions.
 
-**The controller defines the action space of an environment.** The robot can have separate controllers for its arm, gripper, and other components. The action space is a concatenation of the action spaces of all controllers.
+**The controller defines the action space of an task.** The robot can have separate controllers for its arm, gripper, and other components. The action space is a concatenation of the action spaces of all controllers.
+
+Note that while `pd_ee_delta_pose` type controllers that use IK may be more sample efficient to train / learn from for RL workflows, in GPU simulation running these controllers is not that fast and may slow down RL training.
 
 ## Terminology
 
@@ -46,10 +48,10 @@ For simplicity, we use the name of the arm controller to represent each combinat
 
 - gripper_pd_joint_pos (1-dim): Note that we force two gripper fingers to have the same target position. Thus, it is like a "mimic" joint.
 
-## Mobile Manipulator
+<!-- ## Mobile Manipulator
 
 The mobile manipulator is a combination of sciurus17 connector and one or two Panda arms. The controller is named `base_{}_arm_{}`. Except for the base controller, the arm and gripper controllers are the same as in the stationary manipulator.
 
 ### Base controllers
 
-- base_pd_joint_vel (4-dim): only control target velocities. The first 2 dimensions stand for egocentric xy-plane linear velocity and the 3rd dimension stands for egocentric z-axis angular velocity. The 4th dimension stands for velocity to adjust torso.
+- base_pd_joint_vel (4-dim): only control target velocities. The first 2 dimensions stand for egocentric xy-plane linear velocity and the 3rd dimension stands for egocentric z-axis angular velocity. The 4th dimension stands for velocity to adjust torso. -->
diff --git a/docs/source/concepts/environments.md b/docs/source/user_guide/concepts/environments.md
similarity index 99%
rename from docs/source/concepts/environments.md
rename to docs/source/user_guide/concepts/environments.md
index 9744ed14e..92cd33934 100644
--- a/docs/source/concepts/environments.md
+++ b/docs/source/user_guide/concepts/environments.md
@@ -1,4 +1,4 @@
-# Environments
+# Tasks
 
 [asset-badge]: https://img.shields.io/badge/download%20asset-yes-blue.svg
 
diff --git a/docs/source/concepts/index.md b/docs/source/user_guide/concepts/index.md
similarity index 61%
rename from docs/source/concepts/index.md
rename to docs/source/user_guide/concepts/index.md
index 136578a0a..da784f398 100644
--- a/docs/source/concepts/index.md
+++ b/docs/source/user_guide/concepts/index.md
@@ -1,7 +1,7 @@
 # Concepts
 ```{toctree}
 :titlesonly:
-:glob:
 
-*
+controllers/
+observation/
 ```
\ No newline at end of file
diff --git a/docs/source/concepts/observation.md b/docs/source/user_guide/concepts/observation.md
similarity index 96%
rename from docs/source/concepts/observation.md
rename to docs/source/user_guide/concepts/observation.md
index 17f815108..49174a2da 100644
--- a/docs/source/concepts/observation.md
+++ b/docs/source/user_guide/concepts/observation.md
@@ -5,14 +5,14 @@ See our [colab tutorial](https://colab.research.google.com/github/haosulab/ManiS
 ## Observation mode
 
 **The observation mode defines the observation space.**
-All ManiSkill2 environments take the observation mode (`obs_mode`) as one of the input arguments of `__init__`.
+All ManiSkill2 tasks take the observation mode (`obs_mode`) as one of the input arguments of `__init__`.
 In general, the observation is organized as a dictionary (with an observation space of `gym.spaces.Dict`).
 
 There are two raw observations modes: `state_dict` (privileged states) and `image` (raw visual observations without postprocessing). `state` is a flat version of `state_dict`. `rgbd` and `pointcloud` apply post-processing on `image`.
 
 ### state_dict
 
-The observation is a dictionary of states. It usually contains privileged information such as object poses. It is not supported for soft-body environments.
+The observation is a dictionary of states. It usually contains privileged information such as object poses. It is not supported for soft-body tasks.
 
 - `agent`: robot proprioception
   - `qpos`: [nq], current joint positions. *nq* is the degree of freedom.
@@ -38,7 +38,7 @@ In addition to `agent` and `extra`, `image` and `camera_param` are introduced.
   - `extrinsic_cv`: [4, 4], camera extrinsic (OpenCV convention)
   - `intrinsic_cv`: [3, 3], camera intrinsic (OpenCV convention)
 
-Unless specified otherwise, there is usually at least one camera called the *base_camera* (fixed relative to the robot base). Some robots have additional sensor configurations that add more cameras such as a *hand_camera* mounted on the robot hand. Environments migrated from ManiSkill1 use 3 cameras mounted above the robot: *overhead_camera_{i}*.
+Unless specified otherwise, there is usually at least one camera called the *base_camera* (fixed relative to the robot base). Some robots have additional sensor configurations that add more cameras such as a *hand_camera* mounted on the robot hand. Tasks migrated from ManiSkill1 use 3 cameras mounted above the robot: *overhead_camera_{i}*.
 
 ### rgbd
 
@@ -93,7 +93,7 @@ For `obs_mode="pointcloud"`:
 
 ### More Details on Mesh and Actor-Level segmentations
 
-An "actor" is a fundamental object that represents a physical entity (rigid body) that can be simulated in SAPIEN (the backend of ManiSkill2). An articulated object is a collection of links interconnected by joints, and each link is also an actor. In SAPIEN, `scene.get_all_actors()` will return all the actors that are not links of articulated objects. The examples are the ground, the cube in [PickCube](./environments.md#pickcube-v0), and the YCB objects in [PickSingleYCB](./environments.md#picksingleycb-v0). `scene.get_all_articulations()` will return all the articulations. The examples are the robots, the cabinets in [OpenCabinetDoor](./environments.md#opencabinetdoor-v1), and the chairs in [PushChair](./environments.md#pushchair-v1). Below is an example of how to get actors and articulations in SAPIEN.
+An "actor" is a fundamental object that represents a physical entity (rigid body) that can be simulated in SAPIEN (the backend of ManiSkill2). An articulated object is a collection of links interconnected by joints, and each link is also an actor. In SAPIEN, `scene.get_all_actors()` will return all the actors that are not links of articulated objects. The examples are the ground, the cube in [PickCube](./environments.md#PickCube-v1), and the YCB objects in [PickSingleYCB](./environments.md#picksingleycb-v0). `scene.get_all_articulations()` will return all the articulations. The examples are the robots, the cabinets in [OpenCabinetDoor](./environments.md#opencabinetdoor-v1), and the chairs in [PushChair](./environments.md#pushchair-v1). Below is an example of how to get actors and articulations in SAPIEN.
 
 ```python
 import sapien
diff --git a/docs/source/concepts/thumbnails/AssemblingKits-v0.gif b/docs/source/user_guide/concepts/thumbnails/AssemblingKits-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/AssemblingKits-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/AssemblingKits-v0.gif
diff --git a/docs/source/concepts/thumbnails/Excavate-v0.gif b/docs/source/user_guide/concepts/thumbnails/Excavate-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/Excavate-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/Excavate-v0.gif
diff --git a/docs/source/concepts/thumbnails/Fill-v0.gif b/docs/source/user_guide/concepts/thumbnails/Fill-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/Fill-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/Fill-v0.gif
diff --git a/docs/source/concepts/thumbnails/Hang-v0.gif b/docs/source/user_guide/concepts/thumbnails/Hang-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/Hang-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/Hang-v0.gif
diff --git a/docs/source/concepts/thumbnails/MoveBucket-v1.gif b/docs/source/user_guide/concepts/thumbnails/MoveBucket-v1.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/MoveBucket-v1.gif
rename to docs/source/user_guide/concepts/thumbnails/MoveBucket-v1.gif
diff --git a/docs/source/concepts/thumbnails/OpenCabinetDoor-v1.gif b/docs/source/user_guide/concepts/thumbnails/OpenCabinetDoor-v1.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/OpenCabinetDoor-v1.gif
rename to docs/source/user_guide/concepts/thumbnails/OpenCabinetDoor-v1.gif
diff --git a/docs/source/concepts/thumbnails/OpenCabinetDrawer-v1.gif b/docs/source/user_guide/concepts/thumbnails/OpenCabinetDrawer-v1.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/OpenCabinetDrawer-v1.gif
rename to docs/source/user_guide/concepts/thumbnails/OpenCabinetDrawer-v1.gif
diff --git a/docs/source/concepts/thumbnails/PandaAvoidObstacles-v0.gif b/docs/source/user_guide/concepts/thumbnails/PandaAvoidObstacles-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/PandaAvoidObstacles-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/PandaAvoidObstacles-v0.gif
diff --git a/docs/source/concepts/thumbnails/PegInsertionSide-v0.gif b/docs/source/user_guide/concepts/thumbnails/PegInsertionSide-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/PegInsertionSide-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/PegInsertionSide-v0.gif
diff --git a/docs/source/concepts/thumbnails/PickClutterYCB-v0.gif b/docs/source/user_guide/concepts/thumbnails/PickClutterYCB-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/PickClutterYCB-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/PickClutterYCB-v0.gif
diff --git a/docs/source/concepts/thumbnails/PickCube-v0.gif b/docs/source/user_guide/concepts/thumbnails/PickCube-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/PickCube-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/PickCube-v0.gif
diff --git a/docs/source/concepts/thumbnails/PickSingleEGAD-v0.gif b/docs/source/user_guide/concepts/thumbnails/PickSingleEGAD-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/PickSingleEGAD-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/PickSingleEGAD-v0.gif
diff --git a/docs/source/concepts/thumbnails/PickSingleYCB-v0.gif b/docs/source/user_guide/concepts/thumbnails/PickSingleYCB-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/PickSingleYCB-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/PickSingleYCB-v0.gif
diff --git a/docs/source/concepts/thumbnails/Pinch-v0.gif b/docs/source/user_guide/concepts/thumbnails/Pinch-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/Pinch-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/Pinch-v0.gif
diff --git a/docs/source/concepts/thumbnails/PlugCharger-v0.gif b/docs/source/user_guide/concepts/thumbnails/PlugCharger-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/PlugCharger-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/PlugCharger-v0.gif
diff --git a/docs/source/concepts/thumbnails/Pour-v0.gif b/docs/source/user_guide/concepts/thumbnails/Pour-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/Pour-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/Pour-v0.gif
diff --git a/docs/source/concepts/thumbnails/PushChair-v1.gif b/docs/source/user_guide/concepts/thumbnails/PushChair-v1.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/PushChair-v1.gif
rename to docs/source/user_guide/concepts/thumbnails/PushChair-v1.gif
diff --git a/docs/source/concepts/thumbnails/StackCube-v0.gif b/docs/source/user_guide/concepts/thumbnails/StackCube-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/StackCube-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/StackCube-v0.gif
diff --git a/docs/source/concepts/thumbnails/TurnFaucet-v0.gif b/docs/source/user_guide/concepts/thumbnails/TurnFaucet-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/TurnFaucet-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/TurnFaucet-v0.gif
diff --git a/docs/source/concepts/thumbnails/Write-v0.gif b/docs/source/user_guide/concepts/thumbnails/Write-v0.gif
similarity index 100%
rename from docs/source/concepts/thumbnails/Write-v0.gif
rename to docs/source/user_guide/concepts/thumbnails/Write-v0.gif
diff --git a/docs/source/user_guide/data_collection/index.md b/docs/source/user_guide/data_collection/index.md
new file mode 100644
index 000000000..7cc9165d3
--- /dev/null
+++ b/docs/source/user_guide/data_collection/index.md
@@ -0,0 +1,10 @@
+# Data Collection
+
+ManiSkill provides a number of tools to collect demonstration data, via programmed motion planning solutions, to a variety teleoperation systems addressing types of tasks.
+
+```{toctree}
+:titlesonly:
+:glob:
+
+*
+```
\ No newline at end of file
diff --git a/docs/source/user_guide/data_collection/motionplanning.md b/docs/source/user_guide/data_collection/motionplanning.md
new file mode 100644
index 000000000..ed9007e2b
--- /dev/null
+++ b/docs/source/user_guide/data_collection/motionplanning.md
@@ -0,0 +1,6 @@
+# Motion Planning
+
+ManiSkill provides simple tools to use motion planning to generate robot trajectories, primarily via the open-source [mplib](https://github.com/haosulab/MPlib) library. If you install ManiSkill, mplib will come installed already so no extra installation is necessary.
+
+For an in depth tutorial on how to use more advanced features of mplib check out their documentation here: https://motion-planning-lib.readthedocs.io/latest/. Otherwise this section will cover some example code you can use and modify to generate motion planned demonstrations. The example code here is written for the Panda arm but should be modifiable to work for other robots.
+
diff --git a/docs/source/user_guide/data_collection/teleoperation.md b/docs/source/user_guide/data_collection/teleoperation.md
new file mode 100644
index 000000000..832f6164f
--- /dev/null
+++ b/docs/source/user_guide/data_collection/teleoperation.md
@@ -0,0 +1,40 @@
+# Teleoperation
+
+There are a number of teleoperation systems provided by ManiSkill that help collect demonstration data in tasks. Each system is detailed below with how to use it and a demo video. We also detail what hardware requirements are necessary, how usable the system is, and the limitations of the system.
+
+At the moment there is the intuitive click+drag system, systems using e.g. space mouse, a VR headset will come soon.
+
+## Click+Drag System
+
+Requirements: Display, mouse, keyboard
+
+Usability: Extremely easy to generate fine-grained demonstrations
+
+Limitations: Limited to only solving less dynamical tasks with two-finger grippers like picking up a cube. Tasks like throwing a cube would not be possible.
+
+To start the system you can specify an task id with `-e` and run
+```bash
+python -m mani_skill2.examples.teleoperation.interactive_panda -e "StackCube-v1" 
+```
+
+<video preload="auto" controls="True" width="100%">
+<source src="/_static/videos/teleop-stackcube-demo.mp4" type="video/mp4">
+</video>
+
+You can then drag the end-effector of the robot arm around to any position and rotation and press "n" on the keyboard to generate a trajectory to that place (done via motion planning). Each time the system will also print the current info about whether the task is solved or not.
+
+You can press "g" to toggle the gripper to be closing or opening.
+
+To finish collecting one trajectory and to move on to another, simply press "c" which will save the last trajectory.
+
+To stop data collection press "q" to quit. This will then save the trajectory data to your `demos/teleop/<env_id>` folder. In addition it will generate videos of your demos after and put them in the same folder, you can stop this by pressing CTRL+C to stop the script.
+
+You can always press "h" to bring up a help menu describing the keyboard commands.
+
+<!-- TODO (stao): discuss checkpointing method, help button -->
+
+<!-- ## Space Mouse -->
+<!-- 
+## Meta Quest 3
+
+Requirements: Meta Quest 3 -->
\ No newline at end of file
diff --git a/docs/source/datasets/datasets.md b/docs/source/user_guide/datasets/datasets.md
similarity index 57%
rename from docs/source/datasets/datasets.md
rename to docs/source/user_guide/datasets/datasets.md
index b3f738667..dfbbde85c 100644
--- a/docs/source/datasets/datasets.md
+++ b/docs/source/user_guide/datasets/datasets.md
@@ -4,7 +4,7 @@ ManiSkill has a wide variety of demonstrations from different sources including
 
 ## Download
 
-We provide a command line tool to download demonstrations directly from our [Hugging Face 🤗 dataset page](https://huggingface.co/datasets/haosulab/ManiSkill2) which are done by environment ID. The tool will download the demonstration files to a folder and also a few demonstration videos visualizing what the demonstrations look like. See [Environments](../concepts/environments.md) for a list of all supported environments.
+We provide a command line tool to download demonstrations directly from our [Hugging Face 🤗 dataset page](https://huggingface.co/datasets/haosulab/ManiSkill2) which are done by task ID. The tool will download the demonstration files to a folder and also a few demonstration videos visualizing what the demonstrations look like. See [Tasks](../concepts/environments.md) for a list of all supported tasks.
 
 <!-- TODO: add a table here detailing the data info in detail -->
 <!-- Please see our [notes](https://docs.google.com/document/d/1bBKmsR-R_7tR9LwaT1c3J26SjIWw27tWSLdHnfBR01c/edit?usp=sharing) about the details of the demonstrations. -->
@@ -16,13 +16,11 @@ python -m mani_skill2.utils.download_demo all
 python -m mani_skill2.utils.download_demo ${ENV_ID}
 # Download the demonstration datasets for all rigid-body tasks to "./demos"
 python -m mani_skill2.utils.download_demo rigid_body -o ./demos
-# Download the demonstration datasets for all soft-body tasks
-python -m mani_skill2.utils.download_demo soft_body
 ```
 
 ## Format
 
-All demonstrations for an environment are saved in the HDF5 format openable by [h5py](https://github.com/h5py/h5py). Each HDF5 dataset is named `trajectory.{obs_mode}.{control_mode}.h5`, and is associated with a JSON metadata file with the same base name. Unless otherwise specified, `trajectory.h5` is short for `trajectory.none.pd_joint_pos.h5`, which contains the original demonstrations generated by the `pd_joint_pos` controller with the `none` observation mode (empty observations). However, there may exist demonstrations generated by other controllers. **Thus, please check the associated JSON to ensure which controller is used.**
+All demonstrations for an task are saved in the HDF5 format openable by [h5py](https://github.com/h5py/h5py). Each HDF5 dataset is named `trajectory.{obs_mode}.{control_mode}.h5`, and is associated with a JSON metadata file with the same base name. Unless otherwise specified, `trajectory.h5` is short for `trajectory.none.pd_joint_pos.h5`, which contains the original demonstrations generated by the `pd_joint_pos` controller with the `none` observation mode (empty observations). However, there may exist demonstrations generated by other controllers. **Thus, please check the associated JSON to ensure which controller is used.**
 <!-- 
 :::{note}
 For `PickSingleYCB-v0`, `TurnFaucet-v0`, the dataset is named `{model_id}.h5` for each asset. It is due to some legacy issues, and might be changed in the future.
@@ -34,21 +32,23 @@ For `OpenCabinetDoor-v1`, `OpenCabinetDrawer-v1`, `PushChair-v1`, `MoveBucket-v1
 
 Each JSON file contains:
 
-- `env_info` (Dict): environment information, which can be used to initialize the environment
-  - `env_id` (str): environment id
+- `env_info` (Dict): task (also known as environment) information, which can be used to initialize the task
+  - `env_id` (str): task id
   - `max_episode_steps` (int)
-  - `env_kwargs` (Dict): keyword arguments to initialize the environment. **Essential to recreate the environment.**
+  - `env_kwargs` (Dict): keyword arguments to initialize the task. **Essential to recreate the environment.**
 - `episodes` (List[Dict]): episode information
+- `source_type` (Optional[str]): a simple category string describing what process generated the trajectory data. ManiSkill official datasets will usually write one of "human", "motionplanning", or "rl" at the moment.
+- `source_desc` (Optional[str]): a longer explanation of how the data was generated.
 
 The episode information (the element of `episodes`) includes:
 
 - `episode_id` (int): a unique id to index the episode
-- `reset_kwargs` (Dict): keyword arguments to reset the environment. **Essential to reproduce the trajectory.**
+- `reset_kwargs` (Dict): keyword arguments to reset the task. **Essential to reproduce the trajectory.**
 - `control_mode` (str): control mode used for the episode.
 - `elapsed_steps` (int): trajectory length
 - `info` (Dict): information at the end of the episode.
 
-With just the meta data, you can reproduce the environment the same way it was created when the trajectories were collected as so:
+With just the meta data, you can reproduce the task the same way it was created when the trajectories were collected as so:
 
 ```python
 env = gym.make(env_info["env_id"], **env_info["env_kwargs"])
@@ -63,21 +63,64 @@ Each HDF5 demonstration dataset consists of multiple trajectories. The key of ea
 Each trajectory is an `h5py.Group`, which contains:
 
 - actions: [T, A], `np.float32`. `T` is the number of transitions.
-- success: [T], `np.bool_`. It indicates whether the task is successful at each time step.
-- env_states: [T+1, D], `np.float32`. Environment states. It can be used to set the environment to a certain state, e.g., `env.set_state(env_states[i])`. However, it may not be enough to reproduce the trajectory.
-- env_init_state: [D], `np.float32`. The initial environment state. It is used for soft-body environments, since their states (particle positions) can use too much space.
-- obs (optional): observations. If the observation is a `dict`, the value will be stored in `obs/{key}`. The convention is applied recursively for nested dict.
+- terminated: [T], `np.bool_`. It indicates whether the task is terminated or not at each time step.
+- truncated: [T], `np.bool_`. It indicates whether the task is truncated or not at each time step.
+- env_states: [T+1, D], `np.float32`. Environment states. It can be used to set the environment to a certain state via `env.set_state_dict`. However, it may not be enough to reproduce the trajectory.
+- success (optional): [T], `np.bool_`. It indicates whether the task is successful at each time step. Included if task defines success.
+- fail (optional): [T], `np.bool_`. It indicates whether the task is in a failure state at each time step. Included if task defines failure.
+- obs (optional): [T+1, D] observations.
 
-## Replaying/Converting Demonstration data
+Note that env_states is in a dictionary form (and observations may be as well depending on obs_mode), where it is formatted as a dictionary of lists. For example, a typical environment state looks like this:
+
+```python
+env_state = env.get_state_dict()
+"""
+env_state = {
+  "actors": {
+    "actor_id": [...numpy_actor_state...],
+    ...
+  },
+  "articulations": {
+    "articulation_id": [...numpy_articulation_state...],
+    ...
+  }
+}
+"""
+```
+In the trajectory file env_states will be the same structure but each value/leaf in the dictionary will be a sequence of states representing the state of that particular entity in the simulation over time.
+
+In practice it is may be more useful to use slices of the env_states data (or the observations data), which can be done with
+```python
+import mani_skill2.trajectory.utils as trajectory_utils
+env_states = trajectory_utils.dict_to_list_of_dicts(env_states)
+# now env_states[i] is the same as the data env.get_state_dict() returned at timestep i
+i = 10
+env_state_i = trajectory_utils.index_dict(env_states, i)
+# now env_state_i is the same as the data env.get_state_dict() returned at timestep i
+```
+
+These tools are also used in the PyTorch Dataset implementation we provide which is explained the nect section
+
+## Loading Trajectory Datasets
+
+#### PyTorch
+
+
+#### Other
+
+## Loading Demonstration Data
+
+<!-- # TODO (stao): add back replay functionality and maybe conversion -->
+<!-- ## Replaying/Converting Demonstration data
 
 To replay the demonstrations (without changing the observation mode and control mode):
 
 ```bash
 # Replay and view trajectories through sapien viewer
-python -m mani_skill2.trajectory.replay_trajectory --traj-path demos/rigid_body/PickCube-v0/trajectory.h5 --vis
+python -m mani_skill2.trajectory.replay_trajectory --traj-path demos/rigid_body/PickCube-v1/trajectory.h5 --vis
 
 # Save videos of trajectories (to the same directory of trajectory)
-python -m mani_skill2.trajectory.replay_trajectory --traj-path demos/rigid_body/PickCube-v0/trajectory.h5 --save-video
+python -m mani_skill2.trajectory.replay_trajectory --traj-path demos/rigid_body/PickCube-v1/trajectory.h5 --save-video
 ```
 
 :::{note}
@@ -89,7 +132,7 @@ The raw demonstration files contain all the necessary information (e.g. initial
 ```bash
 # Replay demonstrations with control_mode=pd_joint_delta_pos
 python -m mani_skill2.trajectory.replay_trajectory \
-  --traj-path demos/rigid_body/PickCube-v0/trajectory.h5 \
+  --traj-path demos/rigid_body/PickCube-v1/trajectory.h5 \
   --save-traj --target-control-mode pd_joint_delta_pos --obs-mode none --num-procs 10
 ```
 
@@ -109,7 +152,7 @@ python -m mani_skill2.trajectory.replay_trajectory \
 <br>
 
 :::{note}
-For soft-body environments, please compile and generate caches (`python -m mani_skill2.utils.precompile_mpm`) before running the script with multiple processes (with `--num-procs`).
+For soft-body tasks, please compile and generate caches (`python -m mani_skill2.utils.precompile_mpm`) before running the script with multiple processes (with `--num-procs`).
 :::
 
 :::{caution}
@@ -122,6 +165,6 @@ Since some demonstrations are collected in a non-quasi-static way (objects are n
 
 ---
 
-We recommend using our script only for converting actions into different control modes without recording any observation information (i.e. passing `--obs-mode=none`). The reason is that (1) some observation modes, e.g. point cloud, can take much space without any post-processing, e.g., point cloud downsampling; in addition, the `state` mode for soft-body environments also has a similar issue, since the states of those environments are particles. (2) Some algorithms  (e.g. GAIL) require custom keys stored in the demonstration files, e.g. next-observation.
+We recommend using our script only for converting actions into different control modes without recording any observation information (i.e. passing `--obs-mode=none`). The reason is that (1) some observation modes, e.g. point cloud, can take much space without any post-processing, e.g., point cloud downsampling; in addition, the `state` mode for soft-body tasks also has a similar issue, since the states of those tasks are particles. (2) Some algorithms  (e.g. GAIL) require custom keys stored in the demonstration files, e.g. next-observation.
 
-Thus we recommend that, after you convert actions into different control modes, implement your custom environment wrappers for observation processing. After this, use another script to render and save the corresponding post-processed visual demonstrations. [ManiSkill2-Learn](https://github.com/haosulab/ManiSkill2-Learn) has included such observation processing wrappers and demonstration conversion script (with multi-processing), so we recommend referring to the repo for more details.
+Thus we recommend that, after you convert actions into different control modes, implement your custom environment wrappers for observation processing. After this, use another script to render and save the corresponding post-processed visual demonstrations. [ManiSkill2-Learn](https://github.com/haosulab/ManiSkill2-Learn) has included such observation processing wrappers and demonstration conversion script (with multi-processing), so we recommend referring to the repo for more details. -->
diff --git a/docs/source/datasets/index.md b/docs/source/user_guide/datasets/index.md
similarity index 100%
rename from docs/source/datasets/index.md
rename to docs/source/user_guide/datasets/index.md
diff --git a/docs/source/user_guide/demos/index.md b/docs/source/user_guide/demos/index.md
new file mode 100644
index 000000000..60907e997
--- /dev/null
+++ b/docs/source/user_guide/demos/index.md
@@ -0,0 +1,105 @@
+# ManiSkill Demos
+
+There are a number of useful/quick scripts you can run to do a quick test/demonstration of various features of ManiSkill.
+
+## Demo Random Actions
+The fastest and quickest demo is the random actions demo.
+Run 
+```bash
+python -m mani_skill2.examples.demo_random_action -h
+```
+for a full list of available commands.
+
+Some recommended examples that cover a number of features of ManiSkill
+
+Tasks in Realistic Scenes (ReplicaCAD dataset example)
+```bash
+python -m mani_skill2.utils.download_asset "ReplicaCAD"
+python -m mani_skill2.examples.demo_random_action -e "ReplicaCAD_SceneManipulation-v1" \
+  --render-mode="rgb_array" --record-dir="videos" # run headless and save video
+python -m mani_skill2.examples.demo_random_action -e "ReplicaCAD_SceneManipulation-v1" \
+  --render-mode="human" # run with GUI
+```
+
+To turn ray-tracing on for more photo-realistic rendering, you can add `--shader="rt"` or `--shader="rt-fast`
+
+```bash
+python -m mani_skill2.examples.demo_random_action -e "ReplicaCAD_SceneManipulation-v1" \
+  --render-mode="human" --shader="rt-fast" # faster ray-tracing option but lower quality
+```
+
+<video preload="auto" controls="True" width="100%">
+<source src="/_static/videos/fetch_random_action_replica_cad_rt.mp4" type="video/mp4">
+</video>
+
+Tasks with multiple robots
+```bash
+python -m mani_skill2.examples.demo_random_action -e "TwoRobotStackCube-v1" \
+  --render-mode="human"
+```
+
+Tasks with dextrous hand
+```bash
+python -m mani_skill2.examples.demo_random_action -e "RotateValveLevel2-v1" \
+  --render-mode="human"
+```
+
+
+Tasks with simulated tactile sensing
+```bash
+python -m mani_skill2.examples.demo_random_action -e "RotateSingleObjectInHandLevel3-v1" \
+  --render-mode="human"
+```
+This task also uses a feature unique to ManiSkill/SAPIEN where you can retrieve object-pair contact impulses/forces, in addition to object net contact forces. 
+
+To quickly demo tasks that support simulating different objects and articulations (with different dofs) across parallel environments see the [GPU Simulation section](#gpu-simulation)
+
+<!-- 
+AI2THOR related scenes
+```bash
+python -m mani_skill2.utils.download_asset "AI2THOR"
+python -m mani_skill2.examples.demo_random_action -e "ArchitecTHOR_SceneManipulation-v1" --render-mode="rgb_array" --record-dir="videos" # run headless and save video
+python -m mani_skill2.examples.demo_random_action -e "ArchitecTHOR_SceneManipulation-v1" --render-mode="human" # run with GUI
+``` -->
+
+## GPU Simulation
+
+To benchmark the GPU simulation on the PickCube-v1 task with 4096 parallel tasks you can run
+```bash
+python -m mani_skill2.examples.benchmarking.gpu_sim -e "PickCube-v1" -n 4096
+```
+
+To save videos of the visual observations the agent would get (in this case it is just rgb and depth) you can run
+```bash
+python -m mani_skill2.examples.benchmarking.gpu_sim -e "PickCube-v1" -n 64 --save-video --render-mode="sensors"
+```
+it should run quite fast! (3000+ fps on a 4090, you can increase the number envs for higher FPS)
+
+To try out the heterogenous object simulation features you can run
+```bash
+python -m mani_skill2.examples.benchmarking.gpu_sim -e "PickSingleYCB-v1" -n 64 --save-video --render-mode="sensors"
+python -m mani_skill2.examples.benchmarking.gpu_sim -e "RotateValveLevel2-v1" -n 64 --save-video --render-mode="sensors"
+```
+which shows two tasks that have different objects and articulations in every parallel environment.
+
+<!-- TODO show mobile manipulation scene gpu sim stuff -->
+
+More details and performance benchmarking results can be found on [this page](../additional_resources/performance_benchmarking.md)
+
+## Interactive Control
+
+Click+Drag Teleoperation:
+
+Simple tool where you can click and drag the end-effector of the Panda robot arm to solve various tasks. You just click+drag, press "n" to move to the position you dragged to, "g" to toggle on/off grasping, and repeat. Press "q" to quit and save a video of the result.
+
+```bash
+python -m mani_skill2.examples.teleoperation.interactive_panda -e "StackCube-v1" 
+```
+
+See [main page](../data_collection/teleoperation.md#clickdrag-system) for more details about how to use this tool (for demo and data collection). The video below shows the system running.
+
+<video preload="auto" controls="True" width="100%">
+<source src="/_static/videos/teleop-stackcube-demo.mp4" type="video/mp4">
+</video>
+
+## Motion Planning Solutions
\ No newline at end of file
diff --git a/docs/source/user_guide/env_sample.png b/docs/source/user_guide/env_sample.png
new file mode 100644
index 000000000..826c189a9
Binary files /dev/null and b/docs/source/user_guide/env_sample.png differ
diff --git a/docs/source/getting_started/docker.md b/docs/source/user_guide/getting_started/docker.md
similarity index 98%
rename from docs/source/getting_started/docker.md
rename to docs/source/user_guide/getting_started/docker.md
index 08309adc2..5358d5468 100644
--- a/docs/source/getting_started/docker.md
+++ b/docs/source/user_guide/getting_started/docker.md
@@ -19,7 +19,7 @@ We provide a docker image (`haosulab/mani-skill2`) and its corresponding [Docker
 docker pull haosulab/mani-skill2
 docker run --rm -it --gpus all haosulab/mani-skill2 python -m mani_skill2.examples.demo_random_action
 ```
-
+<!-- 
 ## Run GUI Applications
 
 To run GUI applications from the docker container (the host is attached with a display), you need to add extra options to the `docker run` command:
@@ -43,7 +43,7 @@ docker run --rm --gpus all -p 5900:5900 \
     apt update && bash -c "apt install -yqq x11vnc fluxbox && x11vnc -create -env FD_PROG=/usr/bin/fluxbox  -env X11VNC_FINDDISPLAY_ALWAYS_FAILS=1 -env X11VNC_CREATE_GEOM=${1:-1920x1080x16} -gone 'pkill Xvfb' -nopw"
 ```
 
-Then, forward the port of VNC (5900 by default) to the local host. On your local machine, install a [VNC viewer](https://www.realvnc.com/en/connect/download/viewer/) and connect to the localhost port(e.g. localhost:5900).
+Then, forward the port of VNC (5900 by default) to the local host. On your local machine, install a [VNC viewer](https://www.realvnc.com/en/connect/download/viewer/) and connect to the localhost port(e.g. localhost:5900). -->
 
 ---
 
diff --git a/docs/source/getting_started/images/OpenCV-viewer.png b/docs/source/user_guide/getting_started/images/OpenCV-viewer.png
similarity index 100%
rename from docs/source/getting_started/images/OpenCV-viewer.png
rename to docs/source/user_guide/getting_started/images/OpenCV-viewer.png
diff --git a/docs/source/getting_started/images/SAPIEN-viewer.png b/docs/source/user_guide/getting_started/images/SAPIEN-viewer.png
similarity index 100%
rename from docs/source/getting_started/images/SAPIEN-viewer.png
rename to docs/source/user_guide/getting_started/images/SAPIEN-viewer.png
diff --git a/docs/source/user_guide/getting_started/images/demo_random_action_gui.png b/docs/source/user_guide/getting_started/images/demo_random_action_gui.png
new file mode 100644
index 000000000..c9550a1b2
Binary files /dev/null and b/docs/source/user_guide/getting_started/images/demo_random_action_gui.png differ
diff --git a/docs/source/user_guide/getting_started/images/replica_cad_interactive_rt.png b/docs/source/user_guide/getting_started/images/replica_cad_interactive_rt.png
new file mode 100644
index 000000000..93109d682
Binary files /dev/null and b/docs/source/user_guide/getting_started/images/replica_cad_interactive_rt.png differ
diff --git a/docs/source/getting_started/installation.md b/docs/source/user_guide/getting_started/installation.md
similarity index 72%
rename from docs/source/getting_started/installation.md
rename to docs/source/user_guide/getting_started/installation.md
index 2ddb7b374..389c7c0ae 100644
--- a/docs/source/getting_started/installation.md
+++ b/docs/source/user_guide/getting_started/installation.md
@@ -1,48 +1,49 @@
 # Installation
 
-ManiSkill is a GPU-accelerated robotics benchmark built on top of [SAPIEN](https://github.com/haosulab/sapien) designed to support a wide array of applications from robot learning, learning from demonstrations, sim2real/real2sim, and more. Follow the instructions below to get started using ManiSkill.
+Installing ManiSkill is quite simple with a single pip install and potentially installing vulkan if you don't have it already.
 
 From pip (stable version):
 
 ```bash
-# `mani-skill2` or `mani_skill2` is equivalent for pip
-pip install mani-skill2
+# this is currently a beta version of mani_skill with GPU simulation
+pip install mani-skill2==3.0.0.dev0
 ```
 
+<!-- add the other install options one released -->
 From github (latest commit):
 
 ```bash
-pip install --upgrade git+https://github.com/haosulab/ManiSkill2.git
+pip install --upgrade git+https://github.com/haosulab/ManiSkill2.git@dev
 ```
 
 From source:
 
 ```bash
 git clone https://github.com/haosulab/ManiSkill2.git
-cd ManiSkill2 && pip install -e .
+cd ManiSkill2 && git checkout -b dev --track origin/dev && pip install -e .
 ```
 
 :::{note}
 While state-based simulation does not require any additional dependencies, a GPU with the Vulkan driver installed is required to enable rendering in ManiSkill. See [here](#vulkan) for how to install and configure Vulkan on Ubuntu.
 :::
 
-The rigid-body environments, powered by SAPIEN, are ready to use after installation. Test your installation:
+The rigid-body tasks, powered by SAPIEN, are ready to use after installation. Test your installation:
 
 ```bash
-# Run an episode (at most 200 steps) of "PickCube-v0" (a rigid-body environment) with random actions
-# Or specify an environment by "-e ${ENV_ID}"
+# Run an episode (at most 50 steps) of "PickCube-v1" (a rigid-body task) with random actions
+# Or specify an task by "-e ${ENV_ID}"
 python -m mani_skill2.examples.demo_random_action
 ```
 
 A docker image is also provided on [Docker Hub](https://hub.docker.com/repository/docker/haosulab/mani-skill2/general) called  `haosulab/mani-skill2` and its corresponding [Dockerfile](https://github.com/haosulab/ManiSkill2/blob/main/docker/Dockerfile).
 <!-- 
-## Soft-body environments / Warp (ManiSkill2-version)
+## Soft-body tasks / Warp (ManiSkill2-version)
 
 :::{note}
-The following section is to install [NVIDIA Warp](https://github.com/NVIDIA/warp) for soft-body environments. You can skip it if you do not need soft-body environments yet.
+The following section is to install [NVIDIA Warp](https://github.com/NVIDIA/warp) for soft-body tasks. You can skip it if you do not need soft-body tasks yet.
 :::
 
-The soft-body environments in ManiSkill2 are supported by SAPIEN and customized NVIDIA Warp. **CUDA toolkit >= 11.3 and gcc** are required. You can download and install the CUDA toolkit from the [offical website](https://developer.nvidia.com/cuda-downloads?target_os=Linux).
+The soft-body tasks in ManiSkill2 are supported by SAPIEN and customized NVIDIA Warp. **CUDA toolkit >= 11.3 and gcc** are required. You can download and install the CUDA toolkit from the [offical website](https://developer.nvidia.com/cuda-downloads?target_os=Linux).
 
 Assuming the CUDA toolkit is installed at `/usr/local/cuda`, you need to ensure `CUDA_PATH` or `CUDA_HOME` is set properly:
 
@@ -69,19 +70,19 @@ export PYTHONPATH=/path/to/ManiSkill2/warp_maniskill:$PYTHONPATH
 python -m warp_maniskill.build_lib
 ```
 
-For soft-body environments, you need to make sure only 1 CUDA device is visible:
+For soft-body tasks, you need to make sure only 1 CUDA device is visible:
 
 ``` bash
 # Select the first CUDA device. Change 0 to other integer for other device.
 export CUDA_VISIBLE_DEVICES=0
 ```
 
-If multiple CUDA devices are visible, the environment will give an error. If you
-want to interactively visualize the environment, you need to assign the id of
+If multiple CUDA devices are visible, the task will give an error. If you
+want to interactively visualize the task, you need to assign the id of
 the GPU connected to your display (e.g., monitor screen).
 
 :::{warning}
-All soft-body environments require runtime compilation and cache generation. The cache is generated in parallel. Thus, to avoid race conditions, before you create soft-body environments in parallel, please make sure the cache is already generated. You can generate cache in advance by `python -m mani_skill2.utils.precompile_mpm -e {ENV_ID}` (or without an option for all soft-body environments).
+All soft-body tasks require runtime compilation and cache generation. The cache is generated in parallel. Thus, to avoid race conditions, before you create soft-body tasks in parallel, please make sure the cache is already generated. You can generate cache in advance by `python -m mani_skill2.utils.precompile_mpm -e {ENV_ID}` (or without an option for all soft-body tasks).
 ::: -->
 
 ## Troubleshooting
@@ -170,7 +171,7 @@ The following errors can happen if the Vulkan driver is broken. Try to reinstall
 <!-- 
 ### Warp
 
-If the soft-body environment throws a **memory error**, you can try compiling Warp in the debug mode.
+If the soft-body task throws a **memory error**, you can try compiling Warp in the debug mode.
 
 ```bash
 PYTHONPATH="$PWD"/warp_maniskill:$PYTHONPATH python -m warp_maniskill.build_lib --mode debug
diff --git a/docs/source/user_guide/getting_started/quickstart.md b/docs/source/user_guide/getting_started/quickstart.md
new file mode 100644
index 000000000..5eaf05868
--- /dev/null
+++ b/docs/source/user_guide/getting_started/quickstart.md
@@ -0,0 +1,95 @@
+# Quickstart
+
+## Gym Interface
+
+Here is a basic example of how to make a ManiSkill task following the interface of [Gymnasium](https://gymnasium.farama.org/) and run a random policy.
+
+```python
+import gymnasium as gym
+import mani_skill2.envs
+
+env = gym.make(
+    "PickCube-v1", # there are more tasks e.g. "PushCube-v1", "PegInsertionSide-v1", ...
+    obs_mode="state", # there is also "state_dict", "rgbd", ...
+    control_mode="pd_ee_delta_pose", # there is also "pd_joint_delta_pos", ...
+    render_mode="human"
+)
+print("Observation space", env.observation_space)
+print("Action space", env.action_space)
+
+obs, _ = env.reset(seed=0) # reset with a seed for determinism
+done = False
+while not done:
+    action = env.action_space.sample()
+    obs, reward, terminated, truncated, info = env.step(action)
+    done = terminated or truncated
+    env.render()  # a display is required to render
+env.close()
+```
+
+You can also run the same code from the command line to demo random actions
+
+```bash
+python -m mani_skill2.examples.demo_random_action -e PickCube-v1 # run headless
+python -m mani_skill2.examples.demo_random_action -e PickCube-v1 --render-mode="human" # run with A GUI
+```
+
+```{figure} images/demo_random_action_gui.png
+---
+alt: SAPIEN GUI showing the PickCube task
+---
+```
+
+Each ManiSkill task supports different **observation modes** and **control modes**, which determine its **observation space** and **action space**. They can be specified by `gym.make(env_id, obs_mode=..., control_mode=...)`.
+
+The common observation modes are `state`, `rgbd`, `pointcloud`. We also support `state_dict` (states organized as a hierarchical dictionary) and `image` (raw visual observations without postprocessing). Please refer to [Observation](../concepts/observation.md) for more details.
+
+We support a wide range of controllers. Different controllers can have different effects on your algorithms. Thus, it is recommended to understand the action space you are going to use. Please refer to [Controllers](../concepts/controllers.md) for more details.
+
+Some tasks require **downloading assets** that are not stored in the python package itself. You can download task-specific assets by `python -m mani_skill2.utils.download_asset ${ENV_ID}`. The assets will be downloaded to `~/maniskill/data` by default, but you can also use the environment variable `MS_ASSET_DIR` to change this destination. Please refer to [Tasks](../concepts/tasks.md) for all tasks built in out of the box, and which tasks require downloading assets.
+
+We also have demos for simulations of scenes like ReplicaCAD, which can be run by doing
+
+```bash
+python -m mani_skill2.utils.download_asset "ReplicaCAD"
+python -m mani_skill2.examples.demo_random_action.py -e "ReplicaCAD_SceneManipulation-v1" --render-mode="rgb_array" --record-dir="videos" # run headless and save video
+python -m mani_skill2.examples.demo_random_action.py -e "ReplicaCAD_SceneManipulation-v1" --render-mode="human" # run with GUI (recommended!)
+```
+
+
+
+<video preload="auto" controls="True" width="100%">
+<source src="/_static/videos/fetch_random_action_replica_cad_rt.mp4" type="video/mp4">
+</video>
+
+For more details on rendering see TODO (stao). For a compilation of demos you can run without having to write any extra code check out the [demos page](../demos/index)
+
+## GPU Parallelized/Vectorized Tasks
+
+ManiSkill is powered by SAPIEN which supports GPU parallelized physics simulation and GPU parallelized rendering. This enables achieving 200,000+ state-based simulation FPS and 10,000+ FPS with rendering on a single 4090 GPU. For full benchmarking results see [this page](../additional_resources/performance_benchmarking)
+
+In order to run massively parallelized tasks on a GPU, it is as simple as adding the `num_envs` argument to `gym.make` as so
+
+```python
+import gymnasium as gym
+import mani_skill2.envs
+
+env = gym.make("PickCube-v1", num_envs=1024)
+print(env.observation_space) # will now have shape (1024, ...)
+print(env.action_space) # will now have shape (1024, ...)
+```
+
+To benchmark the parallelized simulation, you can run 
+
+```bash
+python -m mani_skill2.examples.benchmarking.gpu_sim --num-envs=1024
+```
+
+To try out the parallelized rendering, you can run
+
+```bash
+# rendering RGB + Depth data from all cameras
+python -m mani_skill2.examples.benchmarking.gpu_sim --num-envs=64 --obs-mode="rgbd"
+# directly save 64 videos of the visual observations put into one video
+python -m mani_skill2.examples.benchmarking.gpu_sim --num-envs=64 --save-video
+```
\ No newline at end of file
diff --git a/docs/source/user_guide/index.md b/docs/source/user_guide/index.md
new file mode 100644
index 000000000..09c575284
--- /dev/null
+++ b/docs/source/user_guide/index.md
@@ -0,0 +1,50 @@
+# User Guide
+
+
+```{figure} env_sample.png
+---
+alt: 4x4 grid of various usable tasks in ManiSkill
+---
+```
+
+
+ManiSkill is a feature-rich GPU-accelerated robotics benchmark built on top of [SAPIEN](https://github.com/haosulab/sapien) designed to provide accessible support for a wide array of applications from robot learning, learning from demonstrations, sim2real/real2sim, and more. 
+
+Features:
+
+* GPU parallelized simulation enabling 250,000+ FPS on some tasks
+* GPU parallelized rendering enabling 15,000+ FPS on some tasks, massively outperforming other simulators
+* Flexible API to build custom tasks
+* Variety of verified robotics tasks with diverse dynamics and visuals, from dexterous hands to low-level mobile manipulation
+* Reproducible baselines in Reinforcement Learning and Learning from Demonstrations
+
+
+
+```{toctree}
+:caption: Get started
+
+getting_started/installation
+getting_started/quickstart
+getting_started/docker
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: Resources
+
+demos/index
+tutorials/index
+concepts/index
+datasets/index
+data_collection/index
+<!-- algorithms_and_models/index
+workflows/index -->
+```
+
+```{toctree}
+:maxdepth: 2
+:caption: Additional Resources
+
+additional_resources/performance_benchmarking
+<!-- additional_resources/education -->
+```
\ No newline at end of file
diff --git a/docs/source/user_guide/tutorials/adding_robots.md b/docs/source/user_guide/tutorials/adding_robots.md
new file mode 100644
index 000000000..066f94cf3
--- /dev/null
+++ b/docs/source/user_guide/tutorials/adding_robots.md
@@ -0,0 +1,7 @@
+# Adding Robots
+
+
+TODO: Detail how to add and model a robot to run in ManiSkill/SAPIEN.
+- Cover working with URDFs, fixing common URDF issues
+- Cover certain disabling collisions for efficiency
+- Cover how to choose drive properties, how to determine when to create drive, tendons etc...
\ No newline at end of file
diff --git a/docs/source/user_guide/tutorials/custom_reusable_scenes.md b/docs/source/user_guide/tutorials/custom_reusable_scenes.md
new file mode 100644
index 000000000..9426f05c4
--- /dev/null
+++ b/docs/source/user_guide/tutorials/custom_reusable_scenes.md
@@ -0,0 +1,4 @@
+# Custom Reusable Scenes
+
+In the [custom tasks tutorial](./custom_environments.md) and the example [push_cube.py](mani_skill2/envs/tasks/pick_cube.py) code you may have noticed that they create a `TableSceneBuilder` object to load the scene's objects and initialize those objects as well as some robots to initial poses. These are classes that inherit the `SceneBuilder` class, which defines a simple few APIs necessary for building and initializing a scene in an task and allow you to easily re-use the scene you make across multiple tasks. It is not absolutely necessary to have to inherit `SceneBuilder` but if used then your custom scene can easily be re-used for existing tasks in ManiSkill that allow randomizing/sampling the scene (e.g. mobile manipulation pick/place/open/close tasks sample random scenes and configurations)
+
diff --git a/docs/source/user_guide/tutorials/custom_tasks.md b/docs/source/user_guide/tutorials/custom_tasks.md
new file mode 100644
index 000000000..e4f741748
--- /dev/null
+++ b/docs/source/user_guide/tutorials/custom_tasks.md
@@ -0,0 +1,324 @@
+# Custom Tasks
+
+Building custom tasks in ManiSkill is straightforward and flexible. ManiSkill provides a number of features to help abstract away most of the GPU memory management required for parallel simulation and rendering.
+
+To build a custom task in ManiSkill, it is comprised of the following core components
+
+1. [Setting up the Task Class](#setting-up-the-task-class)
+2. [Loading (Robots, Assets, Sensors, etc.)](#loading) (done once)
+3. [Episode initialization / Randomization](#episode-initialization-randomization) (done every env.reset)
+4. [Success/Failure Condition](#successfailure-conditions) (done every env.step)
+5. [(Optional) Dense Reward Function](#optional-dense-reward-function) (done every env.step)
+6. [(Optional) Setting up cameras/sensors for observations and rendering/recording](#optional-setting-up-camerassensors-for-observations-and-recording) (done once)
+
+To follow this tutorial easily, we recommend reading this along side reading the [annotated code for the PushCube task](https://github.com/haosulab/ManiSkill2/blob/dev/mani_skill2/envs/tasks/push_cube.py) which describe the purpose of nearly every line of code. The first few sections will cover the bare minimum details necessary to start building your own tasks and show snippets of code from the PushCube task. The advanced sections cover additional topics to do more advanced simulation and optimization such as heterogenous object simulation. 
+
+If you want to skip the tutorial and start from a template you can use the [PushCube task](https://github.com/haosulab/ManiSkill2/blob/dev/mani_skill2/envs/tasks/push_cube.py) as a template, the [annotated template](https://github.com/haosulab/ManiSkill2/blob/dev/mani_skill2/envs/template.py), or the [bare minimum template](https://github.com/haosulab/ManiSkill2/blob/dev/mani_skill2/envs/minimal_template.py).
+
+<!-- This tutorial will first cover each of the core components, and then showcase 3 different tutorial tasks ([PushCube](#example-task-1-push-cube), [PickSingleYCB](#example-task-2-pick-single-ycb), [OpenCabinetDrawer](#example-task-3-open-cabinet-drawer)) that showcase how to use most of the features in ManiSkill. -->
+
+If you have any questions or issues, feel free to ask in our [discord](https://discord.gg/vJb6wUmWXA) or on our [github](https://github.com/haosulab/ManiSkill2/issues)
+
+## Setting up the Task Class
+
+All tasks are defined by their own class and must inherit `BaseEnv`, similar to the design of many other robot learning simulation frameworks. You must then also register the class with a decorator so that the environment can be easily created via the `gym.make(env_id=...)` command in the future. Environment registration is done via `@register_env(env_id, max_episode_steps=...)` where max_episode_steps indicates the timelimit of the task.
+
+```python
+import sapien
+from mani_skill2.utils import sapien_utils
+from mani_skill2.envs.sapien_env import BaseEnv
+from mani_skill2.utils.registration import register_env
+
+@register_env("PushCube-v1", max_episode_steps=50)
+class PushCubeEnv(BaseEnv):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+```
+## Loading
+
+At the start of any task, you must load in all objects (robots, assets, articulations, lighting etc.) into the scene. This is also known as **reconfiguration** and generally only ever occurs once. Loading these objects is done in the `_load_actors` function of your custom task class. The objective is to simply load objects in, and nothing else. For GPU simulation at this stage you cannot change object states (like pose, qpos), only initial poses can be modified. Changing/randomizing states is done in the section on [episode initialization / randomization](#episode-initialization-randomization).
+
+Building objects in ManiSkill is nearly the exact same as it is in SAPIEN. You create an `ActorBuilder` via `self.scene.create_actor_builder` and via the actor builder add visual and collision shapes. Visual shapes only affect visual rendering processes while collision shapes affect the physical simulation.
+
+#### Building Robots
+
+This is the simplest part and requires almost no additional work here. Robots are added in for you automatically and have their base initialized at 0. You can specify the default robot(s) added in via the init function. It is also strongly recommended to use proper typing to indicate which robots are supported and could be available. In PushCube this is done as so by adding class attributes / typing.
+
+```python
+from mani_skill2.agents.robots import Fetch, Panda, Xmate3Robotiq
+
+class PushCubeEnv(BaseEnv):
+
+    SUPPORTED_ROBOTS = ["panda", "xmate3_robotiq", "fetch"]
+
+    agent: Union[Panda, Xmate3Robotiq, Fetch]
+
+    def __init__(self, *args, robot_uids="panda", **kwargs):
+        # robot_uids="fetch" is possible, or even multi-robot setups via robot_uids=("fetch", "panda")
+        super().__init__(*args, robot_uids=robot_uids, **kwargs)
+```
+
+Initializing these robots occurs in the initialization / randomization section covered later. With this setup you can later access agent data via `self.agent` and the specific articulation data of the robot via `self.agent.robot`. For multi-robot setups you can access each agent via `self.agent.agents`.
+
+#### Building Actors
+
+Building a **dynamic** actor like a cube in PushCube is done as so
+```python
+def _load_actors(self):
+    # ...
+    builder = scene.create_actor_builder()
+    builder.add_box_collision(
+        # for boxes we specify half length of each side
+        half_size=[0.02] * 3,
+    )
+    builder.add_box_visual(
+        half_size=[0.02] * 3,
+        material=sapien.render.RenderMaterial(
+            # RGBA values, this is a red cube
+            base_color=[1, 0, 0, 1],
+        ),
+    )
+    self.obj = builder.build(name="cube")
+    # PushCube has some other code after this removed for brevity that 
+    # spawns a goal object (a red/white target) stored at self.goal_region
+```
+
+You can build a **kinematic** actor with `builder.build_kinematic` and a **static** actor with `builder.build_static`. A few sharp bits to keep in mind
+- Dynamic actors can be moved around by forces/other objects (e.g. a robot) and fully physically simulated
+- Kinematic and static actors are fixed in place but can block objects from moving through them (e.g. a wall, a kitchen counter).
+- Kinematic actors can have their pose changed at any time. Static actors must have an initial pose set before calling `build_static` via `builder.initial_pose = ...`
+- Use static instead of kinematic whenever possible as it saves a lot of GPU memory
+
+We also provide some functions that build some more complex shapes that you can use by importing the following:
+```
+from mani_skill2.utils.building import actors
+```
+
+Once built, the return value of `builder.build...` is an `Actor` object, which manages every parallel instance of the built object in each sub-scene. Now the following occurs which makes it easy to build task rewards, success evaluations etc.
+```python
+self.obj.pose.p # batched positions of shape (N, 3)
+self.obj.pose.q # batched quaternions of shape (N, 3)
+self.obj.linear_velocity # batched velocities of shape (N, 3)
+# and more ...
+```
+
+For object building, you can also use pre-built scene builders (tutorial on how to customize/make your own [here](./custom_reusable_scenes.md)). In Push Cube it is done as so
+```python
+def _load_actors(self):
+    self.table_scene = TableSceneBuilder(
+        env=self,
+    )
+    self.table_scene.build()
+    # ...
+```
+The TableSceneBuilder is perfect for easily building table-top tasks, it creates a table and floor for you, and places the fetch and panda robots in reasonable locations.
+
+#### Building Articulations
+
+WIP
+
+#### Reconfiguring and Optimization
+
+In general loading is always quite slow, especially on the GPU so by default, ManiSkill reconfigures just once, any call to `env.reset()` will not trigger a reconfiguration unless you call `env.reset(seed=seed, options=dict(reconfigure=True))` (seed is not needed but recommended if you are reconfiguring for reproducibility).
+
+If you want calls to `env.reset()` to by default reconfigure, you can set a default value for `reconfigure_freq` in your task's `__init__` function
+
+```python
+def __init__(self, *args, robot_uids="panda", reconfigure_freq=1, **kwargs):
+    super().__init__(*args, robot_uids=robot_uids, reconfiguration_freq=reconfiguration_freq, **kwargs)
+```
+
+A `reconfiguration_freq` value of 1 means every during every reset we reconfigure. A `reconfiguration_freq` of `k` means every `k` resets we reconfigure. A `reconfiguration_freq` of 0 (the default) means we never reconfigure again.
+
+In general one use case of setting a positive `reconfiguration_freq` value is for when you want to simulate a task in parallel where each parallel environment is working with a different object/articulation and there are way more object variants than number of parallel environments. For machine learning / RL workflows, setting `reconfiguration_freq` to e.g. 10 ensures every 10 resets the objects being simulated on are randomized which can diversify the data collected for online training.
+
+
+## Episode Initialization / Randomization
+
+Task initialization and randomization is handled in the `_initalize_actors` function and is called whenever `env.reset` is called. The objective here is to set the initial states of objects, including the robot. As the task ideally should be simulatable on the GPU, batched code is unavoidable. Note that furthermore, by default everything in ManiSkill tries to stay batched, even if there is only one element.
+
+An example from part of the PushCube task
+
+```python
+from mani_skill2.utils.structs.pose import Pose
+import torch
+def _initialize_actors(self, env_idx: torch.Tensor):
+    # use the torch.device context manager to automatically create tensors on CPU or CUDA depending on self.device, the device the environment runs on
+    with torch.device(self.device):
+        b = len(env_idx)
+        # use the TableSceneBuilder to initialize all objects in that scene builder
+        self.table_scene.initialize(env_idx)
+
+        # here we write some randomization code that randomizes the x, y position of the cube we are pushing in the range [-0.1, -0.1] to [0.1, 0.1]
+        p = torch.zeros((b, 3))
+        p[..., :2] = torch.rand((b, 2)) * 0.2 - 0.1
+        p[..., 2] = self.cube_half_size
+        q = [1, 0, 0, 0]
+        obj_pose = Pose.create_from_pq(p=p, q=q)
+        self.obj.set_pose(obj_pose)
+```
+
+An `env_idx` is one of the arguments to this function, and is a list of environment IDs that need initialization. This is given as ManiSkill supports partial resets, where at each timestep potentially only a subset of parallel environments will undergo a reset, which calls `_initialize_actors` here. 
+
+Since a scene builder is used, to initialize objects to their original states, we simply call `self.table_scene.initialize(env_idx)`
+
+In the PushCube task, we randomize the pose of the cube by generating a random xy position on the surface of the table (the surface of the table is at z = 0). Notice that we only generate `b = len(env_idx)` random positions as we only need to change `b` objects in `b` parallel environments that are undergoing resetting. 
+
+ManiSkill further provides a feature that any modification to object states are restricted to only the objects in parallel environments that are to be initialized. Thus `self.obj.set_pose` will only accept a batched pose with `b` elements, and will only ever change those `b` objects undergoing reset and initialization. The same applies to modifying articulation qpos via e.g. `self.my_articulation.qpos = ...` or setting velocities etc.
+
+#### Working with Poses
+
+In robot simulation, every object has a pose, which represents the object's position and orientation as 3D positon vector and a 4D [quaternion](https://en.wikipedia.org/wiki/Quaternion). During 
+
+Another feature shown here is the `Pose` object, which is a simple wrapper around the original `sapien.Pose` object that allows you to manage a batch of poses on the GPU and do transformations with poses. To create the `Pose` object you can do one of the two options
+
+```python
+Pose.create(raw_pose)
+Pose.create_from_pq(p=p, q=q)
+```
+
+`raw_pose` is a vector with shape (b, 7), where 3D position and 4D quaternions are concatenated to form 7 dimensional vectors.
+
+`p, q` are position and quaternions. `Pose.create_from_pq` has a feature where it will accept unbatched arguments and batch+repeat `p` or `q` if the other value is batched. For example in the PushCube sample we do
+```
+# p here has shape (b, 3)
+q = [1, 0, 0, 0]
+obj_pose = Pose.create_from_pq(p=p, q=q)
+```
+While `q` is a flat array (python list) representing a single quaternion, `p` is a batch of `b` 3D positions. Pose will create a Pose object that has batch size `b` and the ith pose in `obj_pose` will have position `p[i]` and constant quaternion `q`. The same effect occurs the other way around and provides some convenience to avoid having users write too much batching code themselves.
+
+## Success/Failure Conditions
+
+For each task, at each timestep (when `env.step` is called) we need to evaluate the current state of the task, typically to see if its in a fail or success state. In terms of the gym interface, if success or fail is True, then terminated is True and you can check the returned info object to see if it was because of success or failure. In PushCube, we regard it to be successful if the cube is pushed into the goal region, which is evaluated as so
+```python
+def evaluate(self):
+    # success is achieved when the cube's xy position on the table is within the
+    # goal region's area (a circle centered at the goal region's xy position)
+    is_obj_placed = (
+        torch.linalg.norm(
+            self.obj.pose.p[..., :2] - self.goal_region.pose.p[..., :2], axis=1
+        )
+        < self.goal_radius
+    )
+
+    return {
+        "success": is_obj_placed,
+    }
+```
+
+PushCube task here does not define a fail condition, but you could define one yourself to check if the cube falls off the table (in which case then the task is impossible to solve).
+
+
+Note that some tasks like locomotion/control tasks in [dm-control](https://github.com/google-deepmind/dm_control/) would be tasks where there is no success or failure evaluation. This kind of task is supported and in those cases the evaluation function can just return an empty dictionary.
+
+## (Optional) Dense Reward Function
+
+You can define a dense reward function and then a normalized version of it
+```python
+def compute_dense_reward(self, obs: Any, action: Array, info: Dict):
+    # ...
+    return reward
+
+def compute_normalized_dense_reward(self, obs: Any, action: Array, info: Dict):
+    # this should be equal to compute_dense_reward / max possible reward
+    max_reward = 3.0
+    return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward
+```
+
+`compute_normalized_dense_reward` is the default reward function used and retuurned from `env.step`. We recommend defining normalized reward function as these tend to be easier to learn from, especially in algorithms that learn Q functions in RL. The result of `compute_dense_reward` is returned when an environment created as `gym.make(env_id=..., reward_mode="dense")`
+
+Dense reward functions are not required and can be skipped. Sparse reward functions are available if the evaluation function returns a dictonary with the success key.
+
+## (Optional) Setting up Cameras/Sensors for Observations and Recording
+
+If you want your task to be able to return information from sensors like cameras as part of observations, you need to implement a `_register_sensors` function which should return a list of `SensorConfig` objects. At the moment the only sensor implemented are Cameras. In the future other forms of sensors will be added. Adding a `_register_human_render_cameras` function will add cameras to be used to take pictures for the `"rgb_array"` render mode, which is usually used just for saving videos to look at, but are never used as part of the actual environment observations.
+
+Below shows how to use `CameraConfig` to define sensors, you define its position, quaternion, width, height, fov, near, and far attributes. 
+
+```python
+from mani_skill2.sensors.camera import CameraConfig
+def _register_sensors(self):
+    # registers one 128x128 camera looking at the robot, cube, and target
+    # a smaller sized camera will be lower quality, but render faster
+    pose = sapien_utils.look_at(eye=[0.3, 0, 0.6], target=[-0.1, 0, 0.1])
+    return [
+        CameraConfig("base_camera", pose.p, pose.q, 128, 128, 1, 0.01, 10)
+    ]
+
+def _register_human_render_cameras(self):
+    # registers a more high-definition (512x512) camera used just for rendering when render_mode="rgb_array" or calling env.render_rgb_array()
+    pose = sapien_utils.look_at([0.6, 0.7, 0.6], [0.0, 0.0, 0.35])
+    return CameraConfig("render_camera", pose.p, pose.q, 512, 512, 1, 0.01, 10)
+```
+
+To debug the registered cameras for sensors, you can visualize them by running
+
+```python
+import matplotlib.pyplot as plt
+env = gym.make(env_id=your_env_id, render_mode="sensors")
+env.reset()
+img = env.render()
+plt.imshow(img)
+```
+
+To visualize the human render you can change `render_mode` to "rgb_array".
+
+<!-- 
+## Advanced - Diverse objects/articulations
+
+TODO (stao)
+IDEAL API?
+V1
+
+call build_actor/build_articulation for each unique one and set mask and then build it.
+then call `merge_actors(actors: List[Actor]) -> Actor`... and it will just merge all actors? (a bit easier?)
+
+V2
+
+build entity yourself in each sub scene, then merge them all with Actor.create_from_entities(...) or something
+
+### Articulations
+
+```python
+def Articulation.merge_articulations(articulations: List[Articulation]) -> Articulation:
+  ...
+```
+
+As articulations can all have different DOFs, different links entirely, not all properties of articulations can be used easily, need masking
+
+Shared: 
+- root poses
+- bounding box
+
+Not shared
+- link children and parents
+
+
+## Example Task 1: Push Cube
+
+## Example Task 2: Pick Single YCB
+
+The goal of this example task is to demonstrate how to make task building with heterogenous object geometries easy via the actor merging API. Building tasks with heteroenous objects allows for easier diverse data collection and generaliable policy training. The complete task code is at [mani_skill2/envs/tasks/pick_single_ycb.py](https://github.com/haosulab/ManiSkill2/tree/main/mani_skill2/envs/tasks/pick_single_ycb.py)
+
+Previously in PushCube, we showed how one can simply create a single object like a cube, and ManiSkill will automatically spawn that cube in every sub-scene. To create a different object in each sub-scene, in this case a random object sampled from the YCB object Dataset, you must do this part yourself. As a user you simply write code to decide which sub-scene will have which object. This is done by creating an actor builder as usual, but now setting a scene mask to decide which sub-scenes have this object and which do not.
+
+```python
+for i, model_id in enumerate(model_ids):
+  builder, obj_height = build_actor_ycb(
+      model_id, self._scene, name=model_id, return_builder=True
+  )
+  scene_mask = np.zeros(self.num_envs, dtype=bool)
+  scene_mask[i] = True
+  builder.set_scene_mask(scene_mask)
+  actors.append(builder.build(name=f"{model_id}-{i}"))
+  self.obj_heights.append(obj_height)
+```
+
+The snippet above will now create a list of `Actor` objects, but this makes fetching data about these different actors complicated because you would have to loop over each one. Here you can now use the merge API shown below to simply merge all of these `Actor` objects in the `actors` list into one object that you can then fetch data shared across all objects like pose, linear velocity etc.
+
+```python
+self.obj = Actor.merge(actors, name="ycb_object")
+```
+
+
+## Example Task 3: Open Cabinet Drawer -->
\ No newline at end of file
diff --git a/docs/source/user_guide/tutorials/custom_tasks_advanced.md b/docs/source/user_guide/tutorials/custom_tasks_advanced.md
new file mode 100644
index 000000000..6d4e4cb1f
--- /dev/null
+++ b/docs/source/user_guide/tutorials/custom_tasks_advanced.md
@@ -0,0 +1,2 @@
+# Custom Tasks (Advanced Features)
+
diff --git a/docs/source/tutorials/domain_randomization.md b/docs/source/user_guide/tutorials/domain_randomization.md
similarity index 100%
rename from docs/source/tutorials/domain_randomization.md
rename to docs/source/user_guide/tutorials/domain_randomization.md
diff --git a/docs/source/tutorials/index.md b/docs/source/user_guide/tutorials/index.md
similarity index 76%
rename from docs/source/tutorials/index.md
rename to docs/source/user_guide/tutorials/index.md
index ed84ce564..cb5aef7eb 100644
--- a/docs/source/tutorials/index.md
+++ b/docs/source/user_guide/tutorials/index.md
@@ -1,6 +1,6 @@
 # Tutorials
 
-These are tutorials written by the maintainers of ManiSkill and the community, spanning topics from how to build your own environment/task, to domain randomization, to reinforcement learning. The markdown tutorials are linked below with all tutorial code saved in the [examples/tutorials folder ](https://github.com/haosulab/ManiSkill2/blob/main/examples/tutorials). Some tutorials will also have jupyter notebooks / google colab links that let you run the tutorials without needing your own GPU.
+These are tutorials written by the maintainers of ManiSkill and the community, spanning topics from how to build your own task, to domain randomization, to reinforcement learning. The markdown tutorials are linked below with all tutorial code saved in the [examples/tutorials folder ](https://github.com/haosulab/ManiSkill2/blob/main/examples/tutorials). Some tutorials will also have jupyter notebooks / google colab links that let you run the tutorials without needing your own GPU.
 <!-- 
 - Getting Started: [Jupyter Notebook](https://github.com/haosulab/ManiSkill2/blob/main/examples/tutorials/1_quickstart.ipynb), [Colab](https://colab.research.google.com/github/haosulab/ManiSkill2/blob/main/examples/tutorials/1_quickstart.ipynb)
 - Reinforcement Learning: [Jupyter Notebook](https://github.com/haosulab/ManiSkill2/blob/main/examples/tutorials/2_reinforcement_learning.ipynb), [Colab](https://colab.research.google.com/github/haosulab/ManiSkill2/blob/main/examples/tutorials/2_reinforcement_learning.ipynb)
@@ -12,7 +12,6 @@ These are tutorials written by the maintainers of ManiSkill and the community, s
 
 ```{toctree}
 :titlesonly:
-:glob:
-
-*
+custom_tasks
+custom_reusable_scenes
 ```
\ No newline at end of file
diff --git a/docs/source/workflows/index.md b/docs/source/user_guide/workflows/index.md
similarity index 100%
rename from docs/source/workflows/index.md
rename to docs/source/user_guide/workflows/index.md
diff --git a/docs/source/workflows/learning_from_demos.md b/docs/source/user_guide/workflows/learning_from_demos.md
similarity index 100%
rename from docs/source/workflows/learning_from_demos.md
rename to docs/source/user_guide/workflows/learning_from_demos.md
diff --git a/docs/source/workflows/reinforcement_learning.md b/docs/source/user_guide/workflows/reinforcement_learning.md
similarity index 100%
rename from docs/source/workflows/reinforcement_learning.md
rename to docs/source/user_guide/workflows/reinforcement_learning.md
diff --git a/mani_skill2/__init__.py b/mani_skill2/__init__.py
index 49b8a1fe6..a460b63f6 100644
--- a/mani_skill2/__init__.py
+++ b/mani_skill2/__init__.py
@@ -3,6 +3,8 @@
 
 from .utils.logging_utils import logger
 
+__version__ = "3.0.0.dev0"
+
 # ---------------------------------------------------------------------------- #
 # Setup paths
 # ---------------------------------------------------------------------------- #
diff --git a/mani_skill2/envs/sapien_env.py b/mani_skill2/envs/sapien_env.py
index c492782d9..18ed1b452 100644
--- a/mani_skill2/envs/sapien_env.py
+++ b/mani_skill2/envs/sapien_env.py
@@ -57,9 +57,9 @@ class BaseEnv(gym.Env):
         gpu_sim_backend: The GPU simulation backend to use (only used if the given num_envs argument is > 1). This affects the type of tensor
             returned by the environment for e.g. observations and rewards. Can be "torch" or "jax". Default is "torch"
 
-        obs_mode: observation mode registered in @SUPPORTED_OBS_MODES. See TODO (stao): add doc link here about how they work.
+        obs_mode: observation mode to be used. Must be one of ("state", "state_dict", "none", "sensor_data", "rgbd", "pointcloud")
 
-        reward_mode: reward mode registered in @SUPPORTED_REWARD_MODES. See TODO (stao): add doc link here about how they work.
+        reward_mode: reward mode to use. Must be one of ("normalized_dense", "dense", "sparse").
 
         control_mode: control mode of the agent.
             "*" represents all registered controllers, and the action space will be a dict.
@@ -67,7 +67,8 @@ class BaseEnv(gym.Env):
         render_mode: render mode registered in @SUPPORTED_RENDER_MODES.
 
         shader_dir (str): shader directory. Defaults to "default".
-            "default" and "rt" are built-in options with SAPIEN. Other options are user-defined.
+            "default", "rt", "rt-fast" are built-in options with SAPIEN. Other options are user-defined. "rt" means ray-tracing which results
+            in more photorealistic renders but is slow, "rt-fast" is a lower quality but faster version of "rt".
 
         enable_shadow (bool): whether to enable shadow for lights. Defaults to False.
 
diff --git a/mani_skill2/envs/scenes/__init__.py b/mani_skill2/envs/scenes/__init__.py
index ac3c5ec07..7cb013643 100644
--- a/mani_skill2/envs/scenes/__init__.py
+++ b/mani_skill2/envs/scenes/__init__.py
@@ -16,6 +16,6 @@
 for k, scene_builder_spec in REGISTERED_SCENE_BUILDERS.items():
     register_env(
         f"{k}_SceneManipulation-v1",
-        max_episode_steps=None,
+        max_episode_steps=200,
         scene_builder_cls=k,
     )(SceneManipulationEnv)
diff --git a/mani_skill2/envs/tasks/push_cube.py b/mani_skill2/envs/tasks/push_cube.py
index a3c3f18af..b8b91f452 100644
--- a/mani_skill2/envs/tasks/push_cube.py
+++ b/mani_skill2/envs/tasks/push_cube.py
@@ -23,9 +23,7 @@
 import torch.random
 from transforms3d.euler import euler2quat
 
-from mani_skill2.agents.robots.fetch.fetch import Fetch
-from mani_skill2.agents.robots.panda.panda import Panda
-from mani_skill2.agents.robots.xmate3.xmate3 import Xmate3Robotiq
+from mani_skill2.agents.robots import Fetch, Panda, Xmate3Robotiq
 from mani_skill2.envs.sapien_env import BaseEnv
 from mani_skill2.sensors.camera import CameraConfig
 from mani_skill2.utils import sapien_utils
diff --git a/examples/benchmarking/README.md b/mani_skill2/examples/benchmarking/README.md
similarity index 100%
rename from examples/benchmarking/README.md
rename to mani_skill2/examples/benchmarking/README.md
diff --git a/examples/benchmarking/benchmark_omniisaacgymenvs_sim.py b/mani_skill2/examples/benchmarking/benchmark_omniisaacgymenvs_sim.py
similarity index 100%
rename from examples/benchmarking/benchmark_omniisaacgymenvs_sim.py
rename to mani_skill2/examples/benchmarking/benchmark_omniisaacgymenvs_sim.py
diff --git a/examples/benchmarking/benchmark_orbit_sim.py b/mani_skill2/examples/benchmarking/benchmark_orbit_sim.py
similarity index 100%
rename from examples/benchmarking/benchmark_orbit_sim.py
rename to mani_skill2/examples/benchmarking/benchmark_orbit_sim.py
diff --git a/examples/benchmarking/benchmark_maniskill.py b/mani_skill2/examples/benchmarking/gpu_sim.py
similarity index 90%
rename from examples/benchmarking/benchmark_maniskill.py
rename to mani_skill2/examples/benchmarking/gpu_sim.py
index c1bbfa59a..1db9bd5c4 100644
--- a/examples/benchmarking/benchmark_maniskill.py
+++ b/mani_skill2/examples/benchmarking/gpu_sim.py
@@ -9,7 +9,7 @@
 
 import mani_skill2.envs
 from mani_skill2.vector.wrappers.gymnasium import ManiSkillVectorEnv
-from profiling import Profiler
+from mani_skill2.examples.benchmarking.profiling import Profiler
 from mani_skill2.utils.visualization.misc import images_to_video, tile_images
 from mani_skill2.utils.wrappers.flatten import FlattenActionSpaceWrapper
 
@@ -34,8 +34,7 @@ def main(args):
         )
         if isinstance(env.action_space, gym.spaces.Dict):
             env = FlattenActionSpaceWrapper(env)
-        env = ManiSkillVectorEnv(env)
-        base_env = env.base_env
+        base_env = env.unwrapped
     else:
         env = gym.make_vec(args.env_id, num_envs=args.num_envs, vectorization_mode="async", vector_kwargs=dict(context="spawn"), obs_mode=args.obs_mode,)
         base_env = gym.make(args.env_id, obs_mode=args.obs_mode).unwrapped
@@ -94,16 +93,16 @@ def main(args):
             )
             del images
         env.reset(seed=2022)
-        # N = 1000
-        # with profiler.profile("env.step+env.reset", total_steps=N, num_envs=num_envs):
-        #     for i in range(N):
-        #         actions = (
-        #             2 * torch.rand(env.action_space.shape, device=env.unwrapped.device) - 1
-        #         )
-        #         obs, rew, terminated, truncated, info = env.step(actions)
-        #         if i % 200 == 0 and i != 0:
-        #             env.reset()
-        # profiler.log_stats("env.step+env.reset")
+        N = 1000
+        with profiler.profile("env.step+env.reset", total_steps=N, num_envs=num_envs):
+            for i in range(N):
+                actions = (
+                    2 * torch.rand(env.action_space.shape, device=env.unwrapped.device) - 1
+                )
+                obs, rew, terminated, truncated, info = env.step(actions)
+                if i % 200 == 0 and i != 0:
+                    env.reset()
+        profiler.log_stats("env.step+env.reset")
     env.close()
     # append results to csv
     try:
diff --git a/examples/benchmarking/profiling.py b/mani_skill2/examples/benchmarking/profiling.py
similarity index 100%
rename from examples/benchmarking/profiling.py
rename to mani_skill2/examples/benchmarking/profiling.py
diff --git a/examples/benchmarking/robosuite/benchmark_cpu_sim.py b/mani_skill2/examples/benchmarking/robosuite/benchmark_cpu_sim.py
similarity index 100%
rename from examples/benchmarking/robosuite/benchmark_cpu_sim.py
rename to mani_skill2/examples/benchmarking/robosuite/benchmark_cpu_sim.py
diff --git a/mani_skill2/examples/benchmarking/videos/benchmark/jacobian_inv.mp4 b/mani_skill2/examples/benchmarking/videos/benchmark/jacobian_inv.mp4
new file mode 100644
index 000000000..7ee11ee05
Binary files /dev/null and b/mani_skill2/examples/benchmarking/videos/benchmark/jacobian_inv.mp4 differ
diff --git a/mani_skill2/examples/benchmarking/videos/benchmark/jacobian_transpose.mp4 b/mani_skill2/examples/benchmarking/videos/benchmark/jacobian_transpose.mp4
new file mode 100644
index 000000000..671fdc0c7
Binary files /dev/null and b/mani_skill2/examples/benchmarking/videos/benchmark/jacobian_transpose.mp4 differ
diff --git a/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-PickCube-v1-num_envs=16-obs_mode=state-render_mode=rgb_array.mp4 b/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-PickCube-v1-num_envs=16-obs_mode=state-render_mode=rgb_array.mp4
new file mode 100644
index 000000000..f929af4aa
Binary files /dev/null and b/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-PickCube-v1-num_envs=16-obs_mode=state-render_mode=rgb_array.mp4 differ
diff --git a/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-PickCube-v1-num_envs=16-obs_mode=state-render_mode=sensors.mp4 b/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-PickCube-v1-num_envs=16-obs_mode=state-render_mode=sensors.mp4
new file mode 100644
index 000000000..139b27f94
Binary files /dev/null and b/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-PickCube-v1-num_envs=16-obs_mode=state-render_mode=sensors.mp4 differ
diff --git a/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-PickCube-v1-num_envs=32-obs_mode=state-render_mode=sensors.mp4 b/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-PickCube-v1-num_envs=32-obs_mode=state-render_mode=sensors.mp4
new file mode 100644
index 000000000..398a7ecd8
Binary files /dev/null and b/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-PickCube-v1-num_envs=32-obs_mode=state-render_mode=sensors.mp4 differ
diff --git a/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-PickSingleYCB-v1-num_envs=2-obs_mode=state-render_mode=rgb_array.mp4 b/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-PickSingleYCB-v1-num_envs=2-obs_mode=state-render_mode=rgb_array.mp4
new file mode 100644
index 000000000..1d886ecb9
Binary files /dev/null and b/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-PickSingleYCB-v1-num_envs=2-obs_mode=state-render_mode=rgb_array.mp4 differ
diff --git a/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-ReplicaCAD_SceneManipulation-v1-num_envs=2-obs_mode=state-render_mode=rgb_array.mp4 b/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-ReplicaCAD_SceneManipulation-v1-num_envs=2-obs_mode=state-render_mode=rgb_array.mp4
new file mode 100644
index 000000000..d0b1134c1
Binary files /dev/null and b/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-ReplicaCAD_SceneManipulation-v1-num_envs=2-obs_mode=state-render_mode=rgb_array.mp4 differ
diff --git a/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-ReplicaCAD_SceneManipulation-v1-num_envs=2-obs_mode=state-render_mode=sensors.mp4 b/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-ReplicaCAD_SceneManipulation-v1-num_envs=2-obs_mode=state-render_mode=sensors.mp4
new file mode 100644
index 000000000..4e332bec0
Binary files /dev/null and b/mani_skill2/examples/benchmarking/videos/benchmark/mani_skill_gpu_sim-ReplicaCAD_SceneManipulation-v1-num_envs=2-obs_mode=state-render_mode=sensors.mp4 differ
diff --git a/mani_skill2/examples/benchmarking/videos/benchmark_results_ms3.csv b/mani_skill2/examples/benchmarking/videos/benchmark_results_ms3.csv
new file mode 100644
index 000000000..86b0028ed
--- /dev/null
+++ b/mani_skill2/examples/benchmarking/videos/benchmark_results_ms3.csv
@@ -0,0 +1,9 @@
+env.step/dt,env.step/fps,env.step/psps,env.step/total_steps,env_id,obs_mode,num_envs,control_mode,sensor_settings,gpu_type
+3.2414278984069824,61.70120276261308,30.85060138130654,100,ReplicaCAD_SceneManipulation-v1,state,2,pd_joint_delta_pos,128x128_128x128,NVIDIA GeForce RTX 4090
+2.611290693283081,306.3619083305463,38.29523854131829,100,ReplicaCAD_SceneManipulation-v1,state,8,pd_joint_delta_pos,128x128_128x128,NVIDIA GeForce RTX 4090
+2.889768123626709,2214.7105671467816,34.60485261166846,100,ReplicaCAD_SceneManipulation-v1,state,64,pd_joint_delta_pos,128x128_128x128,NVIDIA GeForce RTX 4090
+6.904705286026001,14830.466436741468,14.48287737963034,100,ReplicaCAD_SceneManipulation-v1,state,1024,pd_joint_delta_pos,128x128_128x128,NVIDIA GeForce RTX 4090
+0.7225360870361328,141723.02510183016,138.40139170100602,100,PickCube-v1,state,1024,pd_joint_delta_pos,128x128,NVIDIA GeForce RTX 4090
+1.4714243412017822,69592.43308178867,67.96136043143424,100,PickCube-v1,state,1024,pd_ee_delta_pose,128x128,NVIDIA GeForce RTX 4090
+6.916746377944946,59218.59464242744,14.457664707623886,100,PickCube-v1,state,4096,pd_ee_delta_pose,128x128,NVIDIA GeForce RTX 4090
+1.4161109924316406,289242.8645700047,70.6159337329113,100,PickCube-v1,state,4096,pd_joint_delta_pos,128x128,NVIDIA GeForce RTX 4090
diff --git a/mani_skill2/examples/demo_random_action.py b/mani_skill2/examples/demo_random_action.py
index 177f2d4e2..a480879cf 100644
--- a/mani_skill2/examples/demo_random_action.py
+++ b/mani_skill2/examples/demo_random_action.py
@@ -9,11 +9,12 @@
 
 def parse_args(args=None):
     parser = argparse.ArgumentParser()
-    parser.add_argument("-e", "--env-id", type=str, default="PushCube-v1")
+    parser.add_argument("-e", "--env-id", type=str, default="PushCube-v1", help="The environment ID of the task you want to simulate")
     parser.add_argument("-o", "--obs-mode", type=str, default="none")
     parser.add_argument("--reward-mode", type=str)
     parser.add_argument("-c", "--control-mode", type=str)
     parser.add_argument("--render-mode", type=str)
+    parser.add_argument("--shader", default="default", type=str, help="Change shader used for rendering. Default is 'default' which is very fast. Can also be 'rt' for ray tracing and generating photo-realistic renders. Can also be 'rt-fast' for a faster but lower quality ray-traced renderer")
     parser.add_argument("--record-dir", type=str)
     parser.add_argument("--quiet", action="store_true", help="Disable verbose output.")
     parser.add_argument(
@@ -47,6 +48,7 @@ def main(args):
         reward_mode=args.reward_mode,
         control_mode=args.control_mode,
         render_mode=args.render_mode,
+        shader_dir=args.shader,
         **args.env_kwargs
     )
 
@@ -83,6 +85,9 @@ def main(args):
                 break
     env.close()
 
+    if record_dir:
+        print(f"Saving video to {record_dir}")
+
 
 if __name__ == "__main__":
     main(parse_args())
diff --git a/mani_skill2/examples/demo_scenes.py b/mani_skill2/examples/demo_scenes.py
deleted file mode 100644
index 0c53adad1..000000000
--- a/mani_skill2/examples/demo_scenes.py
+++ /dev/null
@@ -1,55 +0,0 @@
-"""
-The code at the moment supports scenes created by [AI2-THOR](https://ai2thor.allenai.org/) stored in this
-Hugging Face Dataset: https://huggingface.co/datasets/hssd/ai2thor-hab/tree/main
-
-To download the dataset, run `python -m mani_skill2.utils.scene_builder.ai2thor.download <HUGGING_FACE_TOKEN>`
-and make sure to pass in your hugging face API token. Note that you must create a hugging face account
-and accept the terms of use for the dataset. Alternatively, if you can download the data through other means, simply
-save it to `data/scene_datasets/ai2thor` and the code should run.
-
-To learn how scenes are imported and built in ManiSkill, check out mani_skill2/utils/scene_builder module, there are some prebuilt scenes,
-including code that imports scenes in the AI2THOR set of scenes and format, as well as code to build simple table-top scenes commonly used in
-ManiSkill.
-
-PickObjectScene-v0 selects a scene randomly from the given SceneBuilder and
-instantiates a robot randomly and selects a random object for the robot to find and pick up.
-render_mode="human" opens up a viewer, convex_decomposition="none" makes scene loading fast (but not well simulated)
-set convex_decomposition="coacd" to use CoACD to get better collision meshes
-
-Code is setup so that if you press the "r" key, a new scene is loaded and shown. You can run this file by running
-`python -m mani_skill2.examples.demo_scenes` and explore around.
-"""
-import gymnasium as gym
-import numpy as np
-import sapien.render
-
-import mani_skill2.envs
-from mani_skill2.utils.scene_builder.ai2thor import (ArchitecTHORSceneBuilder,
-                                                     ProcTHORSceneBuilder,
-                                                     RoboTHORSceneBuilder,
-                                                     iTHORSceneBuilder)
-
-if __name__ == "__main__":
-    # specify we want to sample from the ArchitecTHOR set of scenes. Other SceneBuilders are imported above and can be used
-    env = gym.make(
-        "PickObjectScene-v0",
-        render_mode="human",
-        scene_builder_cls=ArchitecTHORSceneBuilder,
-        convex_decomposition="none",
-        fixed_scene=True,
-    )
-
-    # optionally set these to make it more realistic
-    sapien.render.set_camera_shader_dir("rt2")
-    sapien.render.set_viewer_shader_dir("rt2")
-    sapien.render.set_ray_tracing_samples_per_pixel(4)
-    sapien.render.set_ray_tracing_path_depth(2)
-    sapien.render.set_ray_tracing_denoiser("optix")
-
-    env.reset(seed=np.random.randint(2**31), options=dict(reconfigure=True))
-    viewer = env.render()
-    while True:
-        env.render()
-        if viewer.window.key_down("r"):
-            env.reset(options=dict(reconfigure=True))
-            viewer = env.render()
diff --git a/mani_skill2/examples/interactive_teleop.py b/mani_skill2/examples/teleoperation/interactive_panda.py
similarity index 96%
rename from mani_skill2/examples/interactive_teleop.py
rename to mani_skill2/examples/teleoperation/interactive_panda.py
index 3a4e547e9..519397cfc 100644
--- a/mani_skill2/examples/interactive_teleop.py
+++ b/mani_skill2/examples/teleoperation/interactive_panda.py
@@ -10,7 +10,7 @@
 import sapien.utils.viewer
 import h5py
 import json
-from mani_skill2.trajectory.dataset import dict_to_list_of_dicts
+import mani_skill2.trajectory.utils as trajectory_utils
 from mani_skill2.utils import sapien_utils
 from mani_skill2.utils.wrappers.record import RecordEpisode
 def main(args):
@@ -79,7 +79,7 @@ def main(args):
         traj_id = f"traj_{episode['episode_id']}"
         data = trajectory_data[traj_id]
         env.reset(**episode["reset_kwargs"])
-        env_states_list = dict_to_list_of_dicts(data["env_states"])
+        env_states_list = trajectory_utils.dict_to_list_of_dicts(data["env_states"])
 
         env._base_env.set_state_dict(env_states_list[0])
         for action in np.array(data["actions"]):
@@ -122,7 +122,10 @@ def solve(env: BaseEnv, debug=False, vis=False):
 
         env.render_human()
         execute_current_pose = False
-        if viewer.window.key_press("k"):
+        if viewer.window.key_press("h"):
+            # TODO (stao): print help menu
+            pass
+        elif viewer.window.key_press("k"):
             print("Saving checkpoint")
             last_checkpoint_state = env.get_state_dict()
         elif viewer.window.key_press("l"):
diff --git a/mani_skill2/trajectory/dataset.py b/mani_skill2/trajectory/utils.py
similarity index 92%
rename from mani_skill2/trajectory/dataset.py
rename to mani_skill2/trajectory/utils.py
index 45b0a8828..e635f6d77 100644
--- a/mani_skill2/trajectory/dataset.py
+++ b/mani_skill2/trajectory/utils.py
@@ -1,5 +1,5 @@
 """
-Code for reading ManiSkill generated trajectory files
+Utils for working with ManiSkill trajectory files
 """
 
 import h5py
diff --git a/setup.py b/setup.py
index af5624e16..7e2b60a7b 100644
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,14 @@
 from setuptools import find_packages, setup
 
+from mani_skill2 import __version__
+
 long_description = """ManiSkill2 is a unified benchmark for learning generalizable robotic manipulation skills powered by [SAPIEN](https://sapien.ucsd.edu/). **It features 20 out-of-box task families with 2000+ diverse object models and 4M+ demonstration frames**. Moreover, it empowers fast visual input learning algorithms so that **a CNN-based policy can collect samples at about 2000 FPS with 1 GPU and 16 processes on a workstation**. The benchmark can be used to study a wide range of algorithms: 2D & 3D vision-based reinforcement learning, imitation learning, sense-plan-act, etc.
 
 Please refer our [documentation](https://haosulab.github.io/ManiSkill2) to learn more information."""
 
 setup(
     name="mani_skill2",
-    version="0.6.0.dev3",
+    version=__version__,
     description="ManiSkill2: A Unified Benchmark for Generalizable Manipulation Skills",
     long_description=long_description,
     long_description_content_type="text/markdown",
@@ -56,7 +58,7 @@
             # Note that currently sphinx 7 does not work, so we must use v6.2.1. See https://github.com/kivy/kivy/issues/8230 which tracks this issue. Once fixed we can use a later version
             "sphinx==6.2.1",
             "sphinx-autobuild",
-            "sphinx-book-theme",
+            "pydata_sphinx_theme",
             # For spelling
             "sphinxcontrib.spelling",
             # Type hints support
@@ -66,6 +68,7 @@
             # Markdown parser
             "myst-parser",
             "sphinx-subfigure",
+            "sphinxcontrib-video",
         ],
     },
 )