diff --git a/rllib/BUILD b/rllib/BUILD
index 6948f17e903c..1f46b6618f21 100644
--- a/rllib/BUILD
+++ b/rllib/BUILD
@@ -2119,7 +2119,6 @@ py_test(
 
 # subdirectory: checkpoints/
 # ....................................
-
 py_test(
     name = "examples/checkpoints/checkpoint_by_custom_criteria",
     main = "examples/checkpoints/checkpoint_by_custom_criteria.py",
@@ -2283,7 +2282,6 @@ py_test(
 
 # subdirectory: curriculum/
 # ....................................
-
 py_test(
     name = "examples/curriculum/curriculum_learning",
     main = "examples/curriculum/curriculum_learning.py",
@@ -2295,7 +2293,6 @@ py_test(
 
 # subdirectory: debugging/
 # ....................................
-
 #@OldAPIStack
 py_test(
     name = "examples/debugging/deterministic_training_torch",
@@ -2308,7 +2305,6 @@ py_test(
 
 # subdirectory: envs/
 # ....................................
-
 py_test(
     name = "examples/envs/custom_gym_env",
     main = "examples/envs/custom_gym_env.py",
@@ -2449,7 +2445,6 @@ py_test(
 
 # subdirectory: gpus/
 # ....................................
-
 py_test(
     name = "examples/gpus/fractional_0.5_gpus_per_learner",
     main = "examples/gpus/fractional_gpus_per_learner.py",
@@ -2469,7 +2464,6 @@ py_test(
 
 # subdirectory: hierarchical/
 # ....................................
-
 #@OldAPIStack
 py_test(
     name = "examples/hierarchical/hierarchical_training_tf",
@@ -2492,7 +2486,6 @@ py_test(
 
 # subdirectory: inference/
 # ....................................
-
 #@OldAPIStack
 py_test(
     name = "examples/inference/policy_inference_after_training_tf",
@@ -2905,6 +2898,15 @@ py_test(
 
 # subdirectory: rl_modules/
 # ....................................
+py_test(
+    name = "examples/rl_modules/custom_rl_module",
+    main = "examples/rl_modules/custom_rl_module.py",
+    tags = ["team:rllib", "examples"],
+    size = "medium",
+    srcs = ["examples/rl_modules/custom_rl_module.py"],
+    args = ["--enable-new-api-stack", "--stop-iters=3"],
+)
+
 #@OldAPIStack @HybridAPIStack
 py_test(
     name = "examples/rl_modules/classes/mobilenet_rlm_hybrid_api_stack",
diff --git a/rllib/algorithms/ppo/torch/ppo_torch_rl_module.py b/rllib/algorithms/ppo/torch/ppo_torch_rl_module.py
index 46b1f636d7b6..78c041878d1e 100644
--- a/rllib/algorithms/ppo/torch/ppo_torch_rl_module.py
+++ b/rllib/algorithms/ppo/torch/ppo_torch_rl_module.py
@@ -22,8 +22,7 @@ def setup(self):
         super().setup()
 
         # If not an inference-only module (e.g., for evaluation), set up the
-        # parameter names to be removed or renamed when syncing from the state dict
-        # when synching.
+        # parameter names to be removed or renamed when syncing from the state dict.
         if not self.inference_only:
             # Set the expected and unexpected keys for the inference-only module.
             self._set_inference_only_state_dict_keys()
diff --git a/rllib/core/rl_module/rl_module.py b/rllib/core/rl_module/rl_module.py
index 691f9c688b5a..5d71fecf13d7 100644
--- a/rllib/core/rl_module/rl_module.py
+++ b/rllib/core/rl_module/rl_module.py
@@ -2,7 +2,7 @@
 import datetime
 import json
 import pathlib
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Mapping, Any, TYPE_CHECKING, Optional, Type, Dict, Union
 
 import gymnasium as gym
@@ -203,7 +203,7 @@ class RLModuleConfig:
 
     observation_space: gym.Space = None
     action_space: gym.Space = None
-    model_config_dict: Dict[str, Any] = None
+    model_config_dict: Dict[str, Any] = field(default_factory=dict)
     catalog_class: Type["Catalog"] = None
 
     def get_catalog(self) -> "Catalog":
@@ -456,7 +456,8 @@ def setup(self):
 
         This is called automatically during the __init__ method of this class,
         therefore, the subclass should call super.__init__() in its constructor. This
-        abstraction can be used to create any component that your RLModule needs.
+        abstraction can be used to create any components (e.g. NN layers) that your
+        RLModule needs.
         """
         return None
 
@@ -464,14 +465,14 @@ def setup(self):
     def get_train_action_dist_cls(self) -> Type[Distribution]:
         """Returns the action distribution class for this RLModule used for training.
 
-        This class is used to create action distributions from outputs of the
-        forward_train method. If the case that no action distribution class is needed,
+        This class is used to get the correct action distribution class to be used by
+        the training components. In case that no action distribution class is needed,
         this method can return None.
 
         Note that RLlib's distribution classes all implement the `Distribution`
         interface. This requires two special methods: `Distribution.from_logits()` and
-        `Distribution.to_deterministic()`. See the documentation for `Distribution`
-        for more detail.
+        `Distribution.to_deterministic()`. See the documentation of the
+        :py:class:`~ray.rllib.models.distributions.Distribution` class for more details.
         """
         raise NotImplementedError
 
@@ -485,8 +486,8 @@ def get_exploration_action_dist_cls(self) -> Type[Distribution]:
 
         Note that RLlib's distribution classes all implement the `Distribution`
         interface. This requires two special methods: `Distribution.from_logits()` and
-        `Distribution.to_deterministic()`. See the documentation for `Distribution`
-        for more detail.
+        `Distribution.to_deterministic()`. See the documentation of the
+        :py:class:`~ray.rllib.models.distributions.Distribution` class for more details.
         """
         raise NotImplementedError
 
@@ -500,8 +501,8 @@ def get_inference_action_dist_cls(self) -> Type[Distribution]:
 
         Note that RLlib's distribution classes all implement the `Distribution`
         interface. This requires two special methods: `Distribution.from_logits()` and
-        `Distribution.to_deterministic()`. See the documentation for `Distribution`
-        for more detail.
+        `Distribution.to_deterministic()`. See the documentation of the
+        :py:class:`~ray.rllib.models.distributions.Distribution` class for more details.
         """
         raise NotImplementedError
 
@@ -596,9 +597,7 @@ def output_specs_inference(self) -> SpecType:
         a dict that has `action_dist` key and its value is an instance of
         `Distribution`.
         """
-        # TODO (sven): We should probably change this to [ACTION_DIST_INPUTS], b/c this
-        #  is what most algos will do.
-        return {"action_dist": Distribution}
+        return [Columns.ACTION_DIST_INPUTS]
 
     @OverrideToImplementCustomLogic_CallToSuperRecommended
     def output_specs_exploration(self) -> SpecType:
@@ -609,9 +608,7 @@ def output_specs_exploration(self) -> SpecType:
         a dict that has `action_dist` key and its value is an instance of
         `Distribution`.
         """
-        # TODO (sven): We should probably change this to [ACTION_DIST_INPUTS], b/c this
-        #  is what most algos will do.
-        return {"action_dist": Distribution}
+        return [Columns.ACTION_DIST_INPUTS]
 
     def output_specs_train(self) -> SpecType:
         """Returns the output specs of the forward_train method."""
diff --git a/rllib/core/rl_module/torch/torch_rl_module.py b/rllib/core/rl_module/torch/torch_rl_module.py
index 0ced41878552..883b39f26f99 100644
--- a/rllib/core/rl_module/torch/torch_rl_module.py
+++ b/rllib/core/rl_module/torch/torch_rl_module.py
@@ -21,47 +21,6 @@
 torch, nn = try_import_torch()
 
 
-def compile_wrapper(rl_module: "TorchRLModule", compile_config: TorchCompileConfig):
-    """A wrapper that compiles the forward methods of a TorchRLModule."""
-
-    # TODO(Artur): Remove this once our requirements enforce torch >= 2.0.0
-    # Check if torch framework supports torch.compile.
-    if (
-        torch is not None
-        and version.parse(torch.__version__) < TORCH_COMPILE_REQUIRED_VERSION
-    ):
-        raise ValueError("torch.compile is only supported from torch 2.0.0")
-
-    compiled_forward_train = torch.compile(
-        rl_module._forward_train,
-        backend=compile_config.torch_dynamo_backend,
-        mode=compile_config.torch_dynamo_mode,
-        **compile_config.kwargs
-    )
-
-    rl_module._forward_train = compiled_forward_train
-
-    compiled_forward_inference = torch.compile(
-        rl_module._forward_inference,
-        backend=compile_config.torch_dynamo_backend,
-        mode=compile_config.torch_dynamo_mode,
-        **compile_config.kwargs
-    )
-
-    rl_module._forward_inference = compiled_forward_inference
-
-    compiled_forward_exploration = torch.compile(
-        rl_module._forward_exploration,
-        backend=compile_config.torch_dynamo_backend,
-        mode=compile_config.torch_dynamo_mode,
-        **compile_config.kwargs
-    )
-
-    rl_module._forward_exploration = compiled_forward_exploration
-
-    return rl_module
-
-
 class TorchRLModule(nn.Module, RLModule):
     """A base class for RLlib PyTorch RLModules.
 
@@ -234,3 +193,44 @@ class TorchDDPRLModuleWithTargetNetworksInterface(
     @override(RLModuleWithTargetNetworksInterface)
     def get_target_network_pairs(self) -> List[Tuple[NetworkType, NetworkType]]:
         return self.module.get_target_network_pairs()
+
+
+def compile_wrapper(rl_module: "TorchRLModule", compile_config: TorchCompileConfig):
+    """A wrapper that compiles the forward methods of a TorchRLModule."""
+
+    # TODO(Artur): Remove this once our requirements enforce torch >= 2.0.0
+    # Check if torch framework supports torch.compile.
+    if (
+        torch is not None
+        and version.parse(torch.__version__) < TORCH_COMPILE_REQUIRED_VERSION
+    ):
+        raise ValueError("torch.compile is only supported from torch 2.0.0")
+
+    compiled_forward_train = torch.compile(
+        rl_module._forward_train,
+        backend=compile_config.torch_dynamo_backend,
+        mode=compile_config.torch_dynamo_mode,
+        **compile_config.kwargs,
+    )
+
+    rl_module._forward_train = compiled_forward_train
+
+    compiled_forward_inference = torch.compile(
+        rl_module._forward_inference,
+        backend=compile_config.torch_dynamo_backend,
+        mode=compile_config.torch_dynamo_mode,
+        **compile_config.kwargs,
+    )
+
+    rl_module._forward_inference = compiled_forward_inference
+
+    compiled_forward_exploration = torch.compile(
+        rl_module._forward_exploration,
+        backend=compile_config.torch_dynamo_backend,
+        mode=compile_config.torch_dynamo_mode,
+        **compile_config.kwargs,
+    )
+
+    rl_module._forward_exploration = compiled_forward_exploration
+
+    return rl_module
diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py
index 3025689255f2..582806dd193b 100644
--- a/rllib/env/single_agent_env_runner.py
+++ b/rllib/env/single_agent_env_runner.py
@@ -91,12 +91,9 @@ def __init__(self, config: AlgorithmConfig, **kwargs):
         try:
             module_spec: SingleAgentRLModuleSpec = self.config.rl_module_spec
             module_spec.observation_space = self._env_to_module.observation_space
-            # TODO (simon): The `gym.Wrapper` for `gym.vector.VectorEnv` should
-            #  actually hold the spaces for a single env, but for boxes the
-            #  shape is (1, 1) which brings a problem with the action dists.
-            #  shape=(1,) is expected.
             module_spec.action_space = self.env.envs[0].action_space
-            module_spec.model_config_dict = self.config.model_config
+            if module_spec.model_config_dict is None:
+                module_spec.model_config_dict = self.config.model_config
             # Only load a light version of the module, if available. This is useful
             # if the the module has target or critic networks not needed in sampling
             # or inference.
diff --git a/rllib/examples/rl_modules/action_masking_rlm.py b/rllib/examples/rl_modules/action_masking_rlm.py
deleted file mode 100644
index 68bde8c8a8f2..000000000000
--- a/rllib/examples/rl_modules/action_masking_rlm.py
+++ /dev/null
@@ -1,6 +0,0 @@
-msg = """
-This script has been moved to
-`ray.rllib.examples.rl_modules.classes.action_masking_rlm.py`
-"""
-
-raise NotImplementedError(msg)
diff --git a/rllib/examples/rl_modules/classes/tiny_atari_cnn.py b/rllib/examples/rl_modules/classes/tiny_atari_cnn.py
new file mode 100644
index 000000000000..2f45cf219734
--- /dev/null
+++ b/rllib/examples/rl_modules/classes/tiny_atari_cnn.py
@@ -0,0 +1,178 @@
+from ray.rllib.core.columns import Columns
+from ray.rllib.core.rl_module.torch import TorchRLModule
+from ray.rllib.models.torch.torch_distributions import TorchCategorical
+from ray.rllib.models.torch.misc import normc_initializer
+from ray.rllib.models.torch.misc import same_padding, valid_padding
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.framework import try_import_torch
+from ray.rllib.utils.torch_utils import convert_to_torch_tensor
+
+torch, nn = try_import_torch()
+
+
+class TinyAtariCNN(TorchRLModule):
+    """A tiny CNN stack for fast-learning of Atari envs.
+
+    The architecture here is the exact same as the one used by the old API stack as
+    CNN default ModelV2.
+
+    We stack 3 CNN layers based on the config, then a 4th one with linear activation
+    and n 1x1 filters, where n is the number of actions in the (discrete) action space.
+    Simple reshaping (no flattening or extra linear layers necessary) lead to the
+    action logits, which can directly be used inside a distribution or loss.
+    """
+
+    @override(TorchRLModule)
+    def setup(self):
+        """Use this method to create all the model components that you require.
+
+        Feel free to access the following useful properties in this class:
+        - `self.config.model_config_dict`: The config dict for this RLModule class,
+        which should contain flxeible settings, for example: {"hiddens": [256, 256]}.
+        - `self.config.observation|action_space`: The observation and action space that
+        this RLModule is subject to. Note that the observation space might not be the
+        exact space from your env, but that it might have already gone through
+        preprocessing through a connector pipeline (for example, flattening,
+        frame-stacking, mean/std-filtering, etc..).
+        """
+        # Get the CNN stack config from our RLModuleConfig's (self.config)
+        # `model_config_dict` property:
+        if "conv_filters" in self.config.model_config_dict:
+            conv_filters = self.config.model_config_dict["conv_filters"]
+        # Default CNN stack with 3 layers:
+        else:
+            conv_filters = [
+                [16, 4, 2, "same"],  # num filters, kernel wxh, stride wxh, padding type
+                [32, 4, 2, "same"],
+                [256, 11, 1, "valid"],
+            ]
+
+        # Build the CNN layers.
+        layers = []
+
+        # Add user-specified hidden convolutional layers first
+        width, height, in_depth = self.config.observation_space.shape
+        in_size = [width, height]
+        for filter_specs in conv_filters:
+            out_depth, kernel_size, strides, padding = filter_specs
+
+            # Pad like in tensorflow's SAME/VALID mode.
+            if padding == "same":
+                padding_size, out_size = same_padding(in_size, kernel_size, strides)
+                layers.append(nn.ZeroPad2d(padding_size))
+            # No actual padding is performed for "valid" mode, but we will still
+            # compute the output size (input for the next layer).
+            else:
+                out_size = valid_padding(in_size, kernel_size, strides)
+
+            layer = nn.Conv2d(in_depth, out_depth, kernel_size, strides, bias=True)
+            # Initialize CNN layer kernel.
+            nn.init.xavier_uniform_(layer.weight)
+            # Initialize CNN layer bias.
+            nn.init.zeros_(layer.bias)
+
+            layers.append(layer)
+
+            # Activation.
+            layers.append(nn.ReLU())
+
+            in_size = out_size
+            in_depth = out_depth
+
+        self._base_cnn_stack = nn.Sequential(*layers)
+
+        # Add the final CNN 1x1 layer with num_filters == num_actions to be reshaped to
+        # yield the logits (no flattening, no additional linear layers required).
+        self._logits = nn.Sequential(
+            nn.ZeroPad2d(same_padding(in_size, 1, 1)[0]),
+            nn.Conv2d(in_depth, self.config.action_space.n, 1, 1, bias=True),
+        )
+        self._values = nn.Linear(in_depth, 1)
+        # Mimick old API stack behavior of initializing the value function with `normc`
+        # std=0.01.
+        normc_initializer(0.01)(self._values.weight)
+
+    @override(TorchRLModule)
+    def _forward_inference(self, batch, **kwargs):
+        # Compute the basic 1D feature tensor (inputs to policy- and value-heads).
+        _, logits = self._compute_features_and_logits(batch)
+        # Return logits as ACTION_DIST_INPUTS (categorical distribution).
+        return {Columns.ACTION_DIST_INPUTS: logits}
+
+    @override(TorchRLModule)
+    def _forward_exploration(self, batch, **kwargs):
+        return self._forward_inference(batch, **kwargs)
+
+    @override(TorchRLModule)
+    def _forward_train(self, batch, **kwargs):
+        # Compute the basic 1D feature tensor (inputs to policy- and value-heads).
+        features, logits = self._compute_features_and_logits(batch)
+        # Besides the action logits, we also have to return value predictions here
+        # (to be used inside the loss function).
+        values = self._values(features).squeeze(-1)
+        return {
+            Columns.ACTION_DIST_INPUTS: logits,
+            Columns.VF_PREDS: values,
+        }
+
+    # TODO (sven): We still need to define the distibution to use here, even though,
+    #  we have a pretty standard action space (Discrete), which should simply always map
+    #  to a categorical dist. by default.
+    @override(TorchRLModule)
+    def get_inference_action_dist_cls(self):
+        return TorchCategorical
+
+    @override(TorchRLModule)
+    def get_exploration_action_dist_cls(self):
+        return TorchCategorical
+
+    @override(TorchRLModule)
+    def get_train_action_dist_cls(self):
+        return TorchCategorical
+
+    # TODO (sven): In order for this RLModule to work with PPO, we must define
+    #  our own `_compute_values()` method. This would become more obvious, if we simply
+    #  subclassed the `PPOTorchRLModule` directly here (which we didn't do for
+    #  simplicity and to keep some generality). We might change even get rid of algo-
+    #  specific RLModule subclasses altogether in the future and replace them
+    #  by mere algo-specific APIs (w/o any actual implementations).
+    def _compute_values(self, batch, device):
+        obs = convert_to_torch_tensor(batch[Columns.OBS], device=device)
+        features = self._base_cnn_stack(obs.permute(0, 3, 1, 2))
+        features = torch.squeeze(features, dim=[-1, -2])
+        return self._values(features).squeeze(-1)
+
+    def _compute_features_and_logits(self, batch):
+        obs = batch[Columns.OBS].permute(0, 3, 1, 2)
+        features = self._base_cnn_stack(obs)
+        logits = self._logits(features)
+        return (
+            torch.squeeze(features, dim=[-1, -2]),
+            torch.squeeze(logits, dim=[-1, -2]),
+        )
+
+
+if __name__ == "__main__":
+    import numpy as np
+    import gymnasium as gym
+    from ray.rllib.core.rl_module.rl_module import RLModuleConfig
+
+    rl_module_config = RLModuleConfig(
+        observation_space=gym.spaces.Box(-1.0, 1.0, (42, 42, 4), np.float32),
+        action_space=gym.spaces.Discrete(4),
+    )
+    my_net = TinyAtariCNN(rl_module_config)
+
+    B = 10
+    w = 42
+    h = 42
+    c = 4
+    data = torch.from_numpy(
+        np.random.random_sample(size=(B, w, h, c)).astype(np.float32)
+    )
+    print(my_net.forward_inference({"obs": data}))
+    print(my_net.forward_exploration({"obs": data}))
+    print(my_net.forward_train({"obs": data}))
+
+    num_all_params = sum(int(np.prod(p.size())) for p in my_net.parameters())
+    print(f"num params = {num_all_params}")
diff --git a/rllib/examples/rl_modules/custom_rl_module.py b/rllib/examples/rl_modules/custom_rl_module.py
new file mode 100644
index 000000000000..b2f407946071
--- /dev/null
+++ b/rllib/examples/rl_modules/custom_rl_module.py
@@ -0,0 +1,117 @@
+"""Example of implementing and configuring a custom (torch) RLModule.
+
+This example:
+    - demonstrates how you can subclass the TorchRLModule base class and setup your
+    own neural network architecture by overriding `setup()`.
+    - how to override the 3 forward methods: `_forward_inference()`,
+    `_forward_exploration()`, and `forward_train()` to implement your own custom forward
+    logic(s). You will also learn, when each of these 3 methods is called by RLlib or
+    the users of your RLModule.
+    - shows how you then configure an RLlib Algorithm such that it uses your custom
+    RLModule (instead of a default RLModule).
+
+We implement a tiny CNN stack here, the exact same one that is used by the old API
+stack as default CNN net. It comprises 4 convolutional layers, the last of which
+ends in a 1x1 filter size and the number of filters exactly matches the number of
+discrete actions (logits). This way, the (non-activated) output of the last layer only
+needs to be reshaped in order to receive the policy's logit outputs. No flattening
+or additional dense layer required.
+
+The network is then used in a fast ALE/Pong-v5 experiment.
+
+
+How to run this script
+----------------------
+`python [script file name].py --enable-new-api-stack`
+
+For debugging, use the following additional command line options
+`--no-tune --num-env-runners=0`
+which should allow you to set breakpoints anywhere in the RLlib code and
+have the execution stop there for inspection and debugging.
+
+For logging to your WandB account, use:
+`--wandb-key=[your WandB API key] --wandb-project=[some project name]
+--wandb-run-name=[optional: WandB run name (within the defined project)]`
+
+
+Results to expect
+-----------------
+You should see the following output (during the experiment) in your console:
+
+Number of trials: 1/1 (1 RUNNING)
++---------------------+----------+----------------+--------+------------------+
+| Trial name          | status   | loc            |   iter |   total time (s) |
+|                     |          |                |        |                  |
+|---------------------+----------+----------------+--------+------------------+
+| PPO_env_82b44_00000 | RUNNING  | 127.0.0.1:9718 |      1 |          98.3585 |
++---------------------+----------+----------------+--------+------------------+
++------------------------+------------------------+------------------------+
+|   num_env_steps_sample |   num_env_steps_traine |   num_episodes_lifetim |
+|             d_lifetime |             d_lifetime |                      e |
+|------------------------+------------------------+------------------------|
+|                   4000 |                   4000 |                      4 |
++------------------------+------------------------+------------------------+
+"""
+import gymnasium as gym
+
+from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
+from ray.rllib.env.wrappers.atari_wrappers import wrap_atari_for_new_api_stack
+from ray.rllib.examples.rl_modules.classes.tiny_atari_cnn import TinyAtariCNN
+from ray.rllib.utils.test_utils import (
+    add_rllib_example_script_args,
+    run_rllib_example_script_experiment,
+)
+from ray.tune.registry import get_trainable_cls, register_env
+
+parser = add_rllib_example_script_args(default_iters=100, default_timesteps=600000)
+parser.set_defaults(env="ALE/Pong-v5")
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+
+    assert (
+        args.enable_new_api_stack
+    ), "Must set --enable-new-api-stack when running this script!"
+
+    register_env(
+        "env",
+        lambda cfg: wrap_atari_for_new_api_stack(
+            gym.make(args.env, **cfg),
+            dim=42,  # <- need images to be "tiny" for our custom model
+            framestack=4,
+        ),
+    )
+
+    base_config = (
+        get_trainable_cls(args.algo)
+        .get_default_config()
+        .environment(
+            env="env",
+            env_config=dict(
+                frameskip=1,
+                full_action_space=False,
+                repeat_action_probability=0.0,
+            ),
+        )
+        .rl_module(
+            # Plug-in our custom RLModule class.
+            rl_module_spec=SingleAgentRLModuleSpec(
+                module_class=TinyAtariCNN,
+                model_config_dict={"a": "b"},
+            ),
+            # Feel free to specify your own `model_config_dict` settings below.
+            # The `model_config_dict` defined here will be available inside your custom
+            # RLModule class through the `self.config.model_config_dict` property.
+            model_config_dict={
+                "conv_filters": [
+                    # num filters, kernel wxh, stride wxh, padding type
+                    [16, 4, 2, "same"],
+                    [32, 4, 2, "same"],
+                    [64, 4, 2, "same"],
+                ],
+            },
+        )
+    )
+
+    run_rllib_example_script_experiment(base_config, args, stop={})
diff --git a/rllib/examples/rl_modules/episode_env_aware_rlm.py b/rllib/examples/rl_modules/episode_env_aware_rlm.py
deleted file mode 100644
index 9cafd034ec0b..000000000000
--- a/rllib/examples/rl_modules/episode_env_aware_rlm.py
+++ /dev/null
@@ -1,6 +0,0 @@
-msg = """
-This script has been moved to
-`ray.rllib.examples.rl_modules.classes.random_rlm.py::StatefulRandomRLModule`
-"""
-
-raise NotImplementedError(msg)
diff --git a/rllib/examples/rl_modules/frame_stacking_rlm.py b/rllib/examples/rl_modules/frame_stacking_rlm.py
deleted file mode 100644
index 4ed592fa8705..000000000000
--- a/rllib/examples/rl_modules/frame_stacking_rlm.py
+++ /dev/null
@@ -1,12 +0,0 @@
-msg = """
-This script has been taken out of RLlib b/c:
-- This script used `ViewRequirements` ("Trajectory View API") to set up the RLModule,
-however, this API will not be part of the new API stack.
-Instead, you can use RLlib's built-in ConnectorV2 for frame stacking (or write a custom
-ConnectorV2). Take a look at this example script here, which shows how you can do frame-
-stacking with RLlib's new ConnectorV2 API.
-
-`ray.rllib.examples.connectors.frame_stacking.py`
-"""
-
-raise NotImplementedError(msg)
diff --git a/rllib/examples/rl_modules/mobilenet_rlm.py b/rllib/examples/rl_modules/mobilenet_rlm.py
deleted file mode 100644
index 84f57d0566e0..000000000000
--- a/rllib/examples/rl_modules/mobilenet_rlm.py
+++ /dev/null
@@ -1,6 +0,0 @@
-msg = """
-This script has been moved to
-`ray.rllib.examples.rl_modules.classes.mobilenet_rlm.py`
-"""
-
-raise NotImplementedError(msg)
diff --git a/rllib/examples/rl_modules/random_rl_module.py b/rllib/examples/rl_modules/random_rl_module.py
deleted file mode 100644
index eac2d59ddf61..000000000000
--- a/rllib/examples/rl_modules/random_rl_module.py
+++ /dev/null
@@ -1,6 +0,0 @@
-msg = """
-This script has been moved to
-`ray.rllib.examples.rl_modules.classes.random_rlm.py`
-"""
-
-raise NotImplementedError(msg)