diff --git a/docs/source/community_tutorials.md b/docs/source/community_tutorials.md
index 50450eb563..4b2b9a6e54 100644
--- a/docs/source/community_tutorials.md
+++ b/docs/source/community_tutorials.md
@@ -10,6 +10,7 @@ Community tutorials are made by active members of the Hugging Face community tha
 | Structured Generation   | [`SFTTrainer`]  | Fine-tuning Llama-2-7B to generate Persian product catalogs in JSON using QLoRA and PEFT | [Mohammadreza Esmaeilian](https://huggingface.co/Mohammadreza) | [Link](https://huggingface.co/learn/cookbook/en/fine_tuning_llm_to_generate_persian_product_catalogs_in_json_format) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/cookbook/blob/main/notebooks/en/fine_tuning_llm_to_generate_persian_product_catalogs_in_json_format.ipynb) |
 | Preference Optimization | [`DPOTrainer`]  | Align Mistral-7b using Direct Preference Optimization for human preference alignment     | [Maxime Labonne](https://huggingface.co/mlabonne)              | [Link](https://mlabonne.github.io/blog/posts/Fine_tune_Mistral_7b_with_DPO.html)                                     | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mlabonne/llm-course/blob/main/Fine_tune_a_Mistral_7b_model_with_DPO.ipynb)                                             |
 | Preference Optimization | [`ORPOTrainer`] | Fine-tuning Llama 3 with ORPO combining instruction tuning and preference alignment      | [Maxime Labonne](https://huggingface.co/mlabonne)              | [Link](https://mlabonne.github.io/blog/posts/2024-04-19_Fine_tune_Llama_3_with_ORPO.html)                            | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eHNWg9gnaXErdAa8_mcvjMupbSS6rDvi)                                                                                      |
+| Instruction tuning | [`SFTTrainer`] | How to fine-tune open LLMs in 2025 with Hugging Face | [Philipp Schmid](https://huggingface.co/philschmid) | [Link](https://www.philschmid.de/fine-tune-llms-in-2025) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/philschmid/deep-learning-pytorch-huggingface/blob/main/training/fine-tune-llms-in-2025.ipynb) |
 
 <Youtube id="cnGyyM0vOes" />
 
@@ -25,4 +26,4 @@ Community tutorials are made by active members of the Hugging Face community tha
 
 ## Contributing
 
-If you have a tutorial that you would like to add to this list, please open a PR to add it. We will review it and merge it if it is relevant to the community.
\ No newline at end of file
+If you have a tutorial that you would like to add to this list, please open a PR to add it. We will review it and merge it if it is relevant to the community.
diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py
index 6861b6e83e..2672d37f9c 100644
--- a/tests/test_rloo_trainer.py
+++ b/tests/test_rloo_trainer.py
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import platform
-import subprocess
 import tempfile
 import unittest
 
@@ -24,34 +22,6 @@
 from trl import RLOOConfig, RLOOTrainer
 
 
-def test():
-    command = """\
-python examples/scripts/rloo/rloo.py \
-    --dataset_name trl-internal-testing/descriptiveness-sentiment-trl-style \
-    --dataset_train_split descriptiveness \
-    --learning_rate 3e-6 \
-    --output_dir models/minimal/rloo \
-    --per_device_train_batch_size 4 \
-    --gradient_accumulation_steps 1 \
-    --total_episodes 10 \
-    --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
-    --sft_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
-    --reward_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
-    --missing_eos_penalty 1.0 \
-    --save_strategy no \
-    --stop_token eos
-"""
-    if platform.system() == "Windows":
-        # windows CI does not work with subprocesses for some reason
-        # e.g., https://github.com/huggingface/trl/actions/runs/9600036224/job/26475286210?pr=1743
-        return
-    subprocess.run(
-        command,
-        shell=True,
-        check=True,
-    )
-
-
 class RLOOTrainerTester(unittest.TestCase):
     def setUp(self):
         self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
diff --git a/trl/trainer/bco_config.py b/trl/trainer/bco_config.py
index 10cd82b9f5..b3398ae914 100644
--- a/trl/trainer/bco_config.py
+++ b/trl/trainer/bco_config.py
@@ -46,6 +46,8 @@ class BCOConfig(TrainingArguments):
         truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
             Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
             This argument is required if you want to use the default data collator.
+        disable_dropout (`bool`, *optional*, defaults to `True`):
+            Whether to disable dropout in the model and reference model.
         generate_during_eval (`bool`, *optional*, defaults to `False`):
             If `True`, generates and logs completions from both the model and the reference model to W&B during
             evaluation.
@@ -78,6 +80,7 @@ class BCOConfig(TrainingArguments):
     label_pad_token_id: int = -100
     padding_value: Optional[int] = None
     truncation_mode: str = "keep_end"
+    disable_dropout: bool = True
     generate_during_eval: bool = False
     is_encoder_decoder: Optional[bool] = None
     precompute_ref_log_probs: bool = False
diff --git a/trl/trainer/bco_trainer.py b/trl/trainer/bco_trainer.py
index c2d58ab3f2..1c26516793 100644
--- a/trl/trainer/bco_trainer.py
+++ b/trl/trainer/bco_trainer.py
@@ -309,8 +309,6 @@ class BCOTrainer(Trainer):
             The function to use to preprocess the logits before computing the metrics.
         peft_config (`dict`, defaults to `None`):
             The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in a PEFT model.
-        disable_dropout (`bool`, defaults to `True`):
-            Whether or not to disable dropouts in `model` and `ref_model`.
         compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
             The function to use to compute the metrics. Must take a `EvalPrediction` and return
             a dictionary string to metric values.
@@ -538,10 +536,11 @@ def make_inputs_require_grad(module, input, output):
         else:
             self.use_dpo_data_collator = False
 
-        # disable dropout in the model and reference model
-        disable_dropout_in_model(model)
-        if self.ref_model is not None:
-            disable_dropout_in_model(self.ref_model)
+        # Disable dropout in the model and reference model
+        if args.disable_dropout:
+            disable_dropout_in_model(model)
+            if self.ref_model is not None:
+                disable_dropout_in_model(self.ref_model)
 
         self.max_length = max_length
         self.generate_during_eval = args.generate_during_eval
diff --git a/trl/trainer/cpo_trainer.py b/trl/trainer/cpo_trainer.py
index 6d236cfb37..4720517b6a 100644
--- a/trl/trainer/cpo_trainer.py
+++ b/trl/trainer/cpo_trainer.py
@@ -268,6 +268,7 @@ def make_inputs_require_grad(module, input, output):
         else:
             self.use_dpo_data_collator = False
 
+        # Disable dropout in the model
         if args.disable_dropout:
             disable_dropout_in_model(model)
 
diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py
index 3c4c7771b2..d820857de1 100644
--- a/trl/trainer/dpo_trainer.py
+++ b/trl/trainer/dpo_trainer.py
@@ -30,7 +30,7 @@
 import transformers
 from accelerate import PartialState
 from accelerate.utils import is_deepspeed_available, tqdm
-from datasets import Dataset
+from datasets import Dataset, IterableDataset
 from packaging import version
 from torch.utils.data import DataLoader
 from transformers import (
@@ -376,6 +376,7 @@ def make_inputs_require_grad(module, input, output):
         if data_collator is None:
             data_collator = PreferenceCollator(pad_token_id=self.padding_value)
 
+        # Disable dropout in the model and reference model
         if args.disable_dropout:
             disable_dropout_in_model(model)
             if self.ref_model is not None:
@@ -436,53 +437,16 @@ def make_inputs_require_grad(module, input, output):
         # that the warning has already been issued.
         model.warnings_issued["estimate_tokens"] = True
 
-        # Compute that only on the main process for faster data processing.
-        # see: https://github.com/huggingface/trl/pull/1255
-        with PartialState().local_main_process_first():
-            # Extract the prompt if needed, and apply the chat template if needed
-            train_dataset = train_dataset.map(
-                maybe_extract_prompt, num_proc=args.dataset_num_proc, desc="Extracting prompt from train dataset"
-            )
-            train_dataset = train_dataset.map(
-                maybe_apply_chat_template,
-                fn_kwargs={"tokenizer": processing_class},
-                num_proc=args.dataset_num_proc,
-                desc="Applying chat template to train dataset",
-            )
-            if eval_dataset is not None:
-                eval_dataset = eval_dataset.map(
-                    maybe_extract_prompt, num_proc=args.dataset_num_proc, desc="Extracting prompt from eval dataset"
-                )
-                eval_dataset = eval_dataset.map(
-                    maybe_apply_chat_template,
-                    fn_kwargs={"tokenizer": processing_class},
-                    num_proc=args.dataset_num_proc,
-                    desc="Applying chat template to eval dataset",
-                )
-
-            # tokenize the dataset, lower writer batch size to avoid OOM (frequent in vision models)
-            fn_kwargs = {
-                "processing_class": processing_class,
-                "max_prompt_length": args.max_prompt_length,
-                "max_completion_length": args.max_completion_length,
-                # for enc-dec, we add the special tokens ([bos_token] + prompt + [eos_token]; completion + [eos_token])
-                "add_special_tokens": self.is_encoder_decoder,
-            }
-            train_dataset = train_dataset.map(
-                self.tokenize_row if not self.is_vision_model else self.process_row,
-                fn_kwargs=fn_kwargs,
-                num_proc=self.dataset_num_proc,
-                writer_batch_size=10,
-                desc="Tokenizing train dataset",
-            )
-            if eval_dataset is not None:
-                eval_dataset = eval_dataset.map(
-                    self.tokenize_row if not self.is_vision_model else self.process_row,
-                    fn_kwargs=fn_kwargs,
-                    num_proc=self.dataset_num_proc,
-                    writer_batch_size=10,
-                    desc="Tokenizing eval dataset",
-                )
+        # Dataset preparation
+        train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train")
+        if eval_dataset is not None:
+            if isinstance(eval_dataset, dict):
+                eval_dataset = {
+                    key: self._prepare_dataset(dataset, processing_class, args, key)
+                    for key, dataset in eval_dataset.items()
+                }
+            else:
+                eval_dataset = self._prepare_dataset(eval_dataset, processing_class, args, "eval")
 
         super().__init__(
             model=model,
@@ -540,6 +504,48 @@ def make_inputs_require_grad(module, input, output):
         if self.loss_type == "bco_pair":
             self.running = RunningMoments(self.accelerator)
 
+    def _prepare_dataset(
+        self,
+        dataset: Union[Dataset, IterableDataset],
+        processing_class: Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin],
+        args: DPOConfig,
+        dataset_name: str,
+    ) -> Union[Dataset, IterableDataset]:
+        # Build the kwargs for the `map` function
+        map_kwargs = {"writer_batch_size": 10}
+        if isinstance(dataset, Dataset):  # IterableDataset does not support num_proc
+            map_kwargs["num_proc"] = args.dataset_num_proc
+
+        with PartialState().local_main_process_first():
+            # Extract prompt if needed
+            if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
+                map_kwargs["desc"] = f"Extracting prompt in {dataset_name} dataset"
+            dataset = dataset.map(maybe_extract_prompt, **map_kwargs)
+
+            # Apply the chat template if needed
+            if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
+                map_kwargs["desc"] = f"Applying chat template to {dataset_name} dataset"
+            dataset = dataset.map(maybe_apply_chat_template, fn_kwargs={"tokenizer": processing_class}, **map_kwargs)
+
+            # Tokenize the dataset
+            if isinstance(dataset, Dataset):  # `IterableDataset.map` does not support `desc`
+                map_kwargs["desc"] = f"Tokenizing {dataset_name} dataset"
+
+            dataset = dataset.map(
+                self.tokenize_row if not self.is_vision_model else self.process_row,
+                remove_columns=["prompt", "chosen", "rejected"],
+                fn_kwargs={
+                    "processing_class": processing_class,
+                    "max_prompt_length": args.max_prompt_length,
+                    "max_completion_length": args.max_completion_length,
+                    # for enc-dec, we add the special tokens ([bos_token] + prompt + [eos_token]; completion + [eos_token])
+                    "add_special_tokens": False,
+                },
+                **map_kwargs,
+            )
+
+        return dataset
+
     @staticmethod
     def tokenize_row(features, processing_class, max_prompt_length, max_completion_length, add_special_tokens):
         """
diff --git a/trl/trainer/gkd_config.py b/trl/trainer/gkd_config.py
index e9b9d76363..e110b047d1 100644
--- a/trl/trainer/gkd_config.py
+++ b/trl/trainer/gkd_config.py
@@ -41,7 +41,7 @@ class GKDConfig(SFTConfig):
             Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the teacher model
             from a string.
         disable_dropout (`bool`, *optional*, defaults to `True`):
-            Whether or not to disable dropouts in `model`.
+            Whether to disable dropout in the model.
         seq_kd (`bool`, *optional*, defaults to `False`):
             Seq_kd parameter that controls whether to perform Sequence-Level KD (can be viewed as supervised FT
             on teacher-generated output).
diff --git a/trl/trainer/gkd_trainer.py b/trl/trainer/gkd_trainer.py
index be48f1925b..f212b5a296 100644
--- a/trl/trainer/gkd_trainer.py
+++ b/trl/trainer/gkd_trainer.py
@@ -126,6 +126,7 @@ def __init__(
             else:
                 teacher_model = AutoModelForCausalLM.from_pretrained(teacher_model, **teacher_model_init_kwargs)
 
+        # Disable dropout in the model
         if args.disable_dropout:
             disable_dropout_in_model(self.model)
 
diff --git a/trl/trainer/kto_config.py b/trl/trainer/kto_config.py
index 563d0cdbc9..e5feb2dbad 100644
--- a/trl/trainer/kto_config.py
+++ b/trl/trainer/kto_config.py
@@ -77,7 +77,7 @@ class KTOConfig(TrainingArguments):
         dataset_num_proc: (`Optional[int]`, *optional*, defaults to `None`):
             Number of processes to use for processing the dataset.
         disable_dropout (`bool`, *optional*, defaults to `True`):
-            Whether to disable dropout in the model.
+            Whether to disable dropout in the model and reference model.
     """
 
     learning_rate: float = 1e-6
diff --git a/trl/trainer/kto_trainer.py b/trl/trainer/kto_trainer.py
index d054d97e7d..b19955c145 100644
--- a/trl/trainer/kto_trainer.py
+++ b/trl/trainer/kto_trainer.py
@@ -304,8 +304,6 @@ class KTOTrainer(Trainer):
             The function to use to preprocess the logits before computing the metrics.
         peft_config (`dict`, defaults to `None`):
             The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in a PEFT model.
-        disable_dropout (`bool`, defaults to `True`):
-            Whether or not to disable dropouts in `model` and `ref_model`.
         compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
             The function to use to compute the metrics. Must take a `EvalPrediction` and return
             a dictionary string to metric values.
@@ -526,6 +524,7 @@ def make_inputs_require_grad(module, input, output):
         else:
             self.use_dpo_data_collator = False
 
+        # Disable dropout in the model and reference model
         if args.disable_dropout:
             disable_dropout_in_model(model)
             if self.ref_model is not None:
diff --git a/trl/trainer/online_dpo_config.py b/trl/trainer/online_dpo_config.py
index 0b06c79cb5..5e75ede883 100644
--- a/trl/trainer/online_dpo_config.py
+++ b/trl/trainer/online_dpo_config.py
@@ -57,7 +57,7 @@ class OnlineDPOConfig(TrainingArguments):
         dataset_num_proc (`Optional[int]`, *optional*, defaults to `None`):
             Number of processes to use for processing the dataset.
         disable_dropout (`bool`, *optional*, defaults to `True`):
-            Whether to disable dropout in the model.
+            Whether to disable dropout in the model and reference model.
     """
 
     learning_rate: float = 5e-7
diff --git a/trl/trainer/online_dpo_trainer.py b/trl/trainer/online_dpo_trainer.py
index 68008881f5..ebab5cdcfc 100644
--- a/trl/trainer/online_dpo_trainer.py
+++ b/trl/trainer/online_dpo_trainer.py
@@ -196,9 +196,11 @@ def __init__(
             # Get peft model with the given config
             model = get_peft_model(model, peft_config)
 
-        # Disable dropout in the model if specified
+        # Disable dropout in the model and reference model
         if args.disable_dropout:
             disable_dropout_in_model(model)
+            if self.ref_model is not None:
+                disable_dropout_in_model(self.ref_model)
 
         # Handle the ref_model
         # Usually, the user wants the ref model to be the initial version of the model. When using PEFT, it's easy to
diff --git a/trl/trainer/orpo_trainer.py b/trl/trainer/orpo_trainer.py
index 50392526db..65d80802be 100644
--- a/trl/trainer/orpo_trainer.py
+++ b/trl/trainer/orpo_trainer.py
@@ -282,6 +282,7 @@ def make_inputs_require_grad(module, input, output):
         else:
             self.use_dpo_data_collator = False
 
+        # Disable dropout in the model and reference model
         if args.disable_dropout:
             disable_dropout_in_model(model)
 
diff --git a/trl/trainer/prm_config.py b/trl/trainer/prm_config.py
index 4558084572..21a4fc5662 100644
--- a/trl/trainer/prm_config.py
+++ b/trl/trainer/prm_config.py
@@ -35,6 +35,8 @@ class PRMConfig(TrainingArguments):
             Maximum length of the sequences (prompt + completion) used for truncation.
         max_completion_length (`Optional[int]`, *optional*, defaults to `None`):
             Maximum length of the completion used for truncation. The completion is the concatenation of the steps.
+        disable_dropout (`bool`, *optional*, defaults to `True`):
+            Whether to disable dropout in the model.
         step_separator (`str`, *optional*, defaults to `"\n"`):
             Separator used to separate each step of the reasoning process.
         train_on_last_step_only (`bool`, *optional*, defaults to `False`):
@@ -46,6 +48,7 @@ class PRMConfig(TrainingArguments):
     learning_rate: float = 1e-5
     max_length: Optional[int] = None
     max_completion_length: Optional[int] = None
+    disable_dropout: bool = True
     step_separator: str = "\n"
     train_on_last_step_only: bool = False
     dataset_num_proc: Optional[int] = None
diff --git a/trl/trainer/prm_trainer.py b/trl/trainer/prm_trainer.py
index dbb3558d57..47d73ce19c 100644
--- a/trl/trainer/prm_trainer.py
+++ b/trl/trainer/prm_trainer.py
@@ -39,7 +39,7 @@
 from transformers.utils import is_peft_available
 
 from .prm_config import PRMConfig
-from .utils import compute_accuracy, generate_model_card
+from .utils import compute_accuracy, disable_dropout_in_model, generate_model_card
 
 
 if is_peft_available():
@@ -130,6 +130,10 @@ def __init__(
 
                 model = get_peft_model(model, peft_config)
 
+        # Disable dropout in the model
+        if args.disable_dropout:
+            disable_dropout_in_model(model)
+
         if compute_metrics is None:
             compute_metrics = compute_accuracy
 
diff --git a/trl/trainer/reward_config.py b/trl/trainer/reward_config.py
index 6e3eeab372..8018a2844c 100644
--- a/trl/trainer/reward_config.py
+++ b/trl/trainer/reward_config.py
@@ -31,6 +31,8 @@ class RewardConfig(TrainingArguments):
         max_length (`Optional[int]`, *optional*, defaults to `None`):
             Maximum length of the sequences (prompt + completion) in the batch. This argument is required if you want
             to use the default data collator.
+        disable_dropout (`bool`, *optional*, defaults to `True`):
+            Whether to disable dropout in the model.
         dataset_num_proc (`int`, *optional*, defaults to `None`):
             Number of processes to use for processing the dataset.
         center_rewards_coefficient (`float`, *optional*, defaults to `None`):
@@ -42,6 +44,7 @@ class RewardConfig(TrainingArguments):
     """
 
     max_length: Optional[int] = None
+    disable_dropout: bool = True
     dataset_num_proc: Optional[int] = None
     center_rewards_coefficient: Optional[float] = None
     remove_unused_columns: bool = False
diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py
index 109d8a47cf..79b237b9e7 100644
--- a/trl/trainer/reward_trainer.py
+++ b/trl/trainer/reward_trainer.py
@@ -47,6 +47,7 @@
     RewardDataCollatorWithPadding,
     compute_accuracy,
     decode_and_strip_padding,
+    disable_dropout_in_model,
     generate_model_card,
     get_comet_experiment_url,
     log_table_to_comet_experiment,
@@ -169,6 +170,10 @@ def __init__(
 
                 model = get_peft_model(model, peft_config)
 
+        # Disable dropout in the model
+        if args.disable_dropout:
+            disable_dropout_in_model(model)
+
         if compute_metrics is None:
             compute_metrics = compute_accuracy