diff --git a/docs/source/community_tutorials.md b/docs/source/community_tutorials.md index 50450eb563..4b2b9a6e54 100644 --- a/docs/source/community_tutorials.md +++ b/docs/source/community_tutorials.md @@ -10,6 +10,7 @@ Community tutorials are made by active members of the Hugging Face community tha | Structured Generation | [`SFTTrainer`] | Fine-tuning Llama-2-7B to generate Persian product catalogs in JSON using QLoRA and PEFT | [Mohammadreza Esmaeilian](https://huggingface.co/Mohammadreza) | [Link](https://huggingface.co/learn/cookbook/en/fine_tuning_llm_to_generate_persian_product_catalogs_in_json_format) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/cookbook/blob/main/notebooks/en/fine_tuning_llm_to_generate_persian_product_catalogs_in_json_format.ipynb) | | Preference Optimization | [`DPOTrainer`] | Align Mistral-7b using Direct Preference Optimization for human preference alignment | [Maxime Labonne](https://huggingface.co/mlabonne) | [Link](https://mlabonne.github.io/blog/posts/Fine_tune_Mistral_7b_with_DPO.html) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mlabonne/llm-course/blob/main/Fine_tune_a_Mistral_7b_model_with_DPO.ipynb) | | Preference Optimization | [`ORPOTrainer`] | Fine-tuning Llama 3 with ORPO combining instruction tuning and preference alignment | [Maxime Labonne](https://huggingface.co/mlabonne) | [Link](https://mlabonne.github.io/blog/posts/2024-04-19_Fine_tune_Llama_3_with_ORPO.html) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eHNWg9gnaXErdAa8_mcvjMupbSS6rDvi) | +| Instruction tuning | [`SFTTrainer`] | How to fine-tune open LLMs in 2025 with Hugging Face | [Philipp Schmid](https://huggingface.co/philschmid) | [Link](https://www.philschmid.de/fine-tune-llms-in-2025) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/philschmid/deep-learning-pytorch-huggingface/blob/main/training/fine-tune-llms-in-2025.ipynb) | @@ -25,4 +26,4 @@ Community tutorials are made by active members of the Hugging Face community tha ## Contributing -If you have a tutorial that you would like to add to this list, please open a PR to add it. We will review it and merge it if it is relevant to the community. \ No newline at end of file +If you have a tutorial that you would like to add to this list, please open a PR to add it. We will review it and merge it if it is relevant to the community. diff --git a/tests/test_rloo_trainer.py b/tests/test_rloo_trainer.py index 6861b6e83e..2672d37f9c 100644 --- a/tests/test_rloo_trainer.py +++ b/tests/test_rloo_trainer.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import platform -import subprocess import tempfile import unittest @@ -24,34 +22,6 @@ from trl import RLOOConfig, RLOOTrainer -def test(): - command = """\ -python examples/scripts/rloo/rloo.py \ - --dataset_name trl-internal-testing/descriptiveness-sentiment-trl-style \ - --dataset_train_split descriptiveness \ - --learning_rate 3e-6 \ - --output_dir models/minimal/rloo \ - --per_device_train_batch_size 4 \ - --gradient_accumulation_steps 1 \ - --total_episodes 10 \ - --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \ - --sft_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \ - --reward_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \ - --missing_eos_penalty 1.0 \ - --save_strategy no \ - --stop_token eos -""" - if platform.system() == "Windows": - # windows CI does not work with subprocesses for some reason - # e.g., https://github.com/huggingface/trl/actions/runs/9600036224/job/26475286210?pr=1743 - return - subprocess.run( - command, - shell=True, - check=True, - ) - - class RLOOTrainerTester(unittest.TestCase): def setUp(self): self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5" diff --git a/trl/trainer/bco_config.py b/trl/trainer/bco_config.py index 10cd82b9f5..b3398ae914 100644 --- a/trl/trainer/bco_config.py +++ b/trl/trainer/bco_config.py @@ -46,6 +46,8 @@ class BCOConfig(TrainingArguments): truncation_mode (`str`, *optional*, defaults to `"keep_end"`): Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`. This argument is required if you want to use the default data collator. + disable_dropout (`bool`, *optional*, defaults to `True`): + Whether to disable dropout in the model and reference model. generate_during_eval (`bool`, *optional*, defaults to `False`): If `True`, generates and logs completions from both the model and the reference model to W&B during evaluation. @@ -78,6 +80,7 @@ class BCOConfig(TrainingArguments): label_pad_token_id: int = -100 padding_value: Optional[int] = None truncation_mode: str = "keep_end" + disable_dropout: bool = True generate_during_eval: bool = False is_encoder_decoder: Optional[bool] = None precompute_ref_log_probs: bool = False diff --git a/trl/trainer/bco_trainer.py b/trl/trainer/bco_trainer.py index c2d58ab3f2..1c26516793 100644 --- a/trl/trainer/bco_trainer.py +++ b/trl/trainer/bco_trainer.py @@ -309,8 +309,6 @@ class BCOTrainer(Trainer): The function to use to preprocess the logits before computing the metrics. peft_config (`dict`, defaults to `None`): The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in a PEFT model. - disable_dropout (`bool`, defaults to `True`): - Whether or not to disable dropouts in `model` and `ref_model`. compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to metric values. @@ -538,10 +536,11 @@ def make_inputs_require_grad(module, input, output): else: self.use_dpo_data_collator = False - # disable dropout in the model and reference model - disable_dropout_in_model(model) - if self.ref_model is not None: - disable_dropout_in_model(self.ref_model) + # Disable dropout in the model and reference model + if args.disable_dropout: + disable_dropout_in_model(model) + if self.ref_model is not None: + disable_dropout_in_model(self.ref_model) self.max_length = max_length self.generate_during_eval = args.generate_during_eval diff --git a/trl/trainer/cpo_trainer.py b/trl/trainer/cpo_trainer.py index 6d236cfb37..4720517b6a 100644 --- a/trl/trainer/cpo_trainer.py +++ b/trl/trainer/cpo_trainer.py @@ -268,6 +268,7 @@ def make_inputs_require_grad(module, input, output): else: self.use_dpo_data_collator = False + # Disable dropout in the model if args.disable_dropout: disable_dropout_in_model(model) diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index 3c4c7771b2..d820857de1 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -30,7 +30,7 @@ import transformers from accelerate import PartialState from accelerate.utils import is_deepspeed_available, tqdm -from datasets import Dataset +from datasets import Dataset, IterableDataset from packaging import version from torch.utils.data import DataLoader from transformers import ( @@ -376,6 +376,7 @@ def make_inputs_require_grad(module, input, output): if data_collator is None: data_collator = PreferenceCollator(pad_token_id=self.padding_value) + # Disable dropout in the model and reference model if args.disable_dropout: disable_dropout_in_model(model) if self.ref_model is not None: @@ -436,53 +437,16 @@ def make_inputs_require_grad(module, input, output): # that the warning has already been issued. model.warnings_issued["estimate_tokens"] = True - # Compute that only on the main process for faster data processing. - # see: https://github.com/huggingface/trl/pull/1255 - with PartialState().local_main_process_first(): - # Extract the prompt if needed, and apply the chat template if needed - train_dataset = train_dataset.map( - maybe_extract_prompt, num_proc=args.dataset_num_proc, desc="Extracting prompt from train dataset" - ) - train_dataset = train_dataset.map( - maybe_apply_chat_template, - fn_kwargs={"tokenizer": processing_class}, - num_proc=args.dataset_num_proc, - desc="Applying chat template to train dataset", - ) - if eval_dataset is not None: - eval_dataset = eval_dataset.map( - maybe_extract_prompt, num_proc=args.dataset_num_proc, desc="Extracting prompt from eval dataset" - ) - eval_dataset = eval_dataset.map( - maybe_apply_chat_template, - fn_kwargs={"tokenizer": processing_class}, - num_proc=args.dataset_num_proc, - desc="Applying chat template to eval dataset", - ) - - # tokenize the dataset, lower writer batch size to avoid OOM (frequent in vision models) - fn_kwargs = { - "processing_class": processing_class, - "max_prompt_length": args.max_prompt_length, - "max_completion_length": args.max_completion_length, - # for enc-dec, we add the special tokens ([bos_token] + prompt + [eos_token]; completion + [eos_token]) - "add_special_tokens": self.is_encoder_decoder, - } - train_dataset = train_dataset.map( - self.tokenize_row if not self.is_vision_model else self.process_row, - fn_kwargs=fn_kwargs, - num_proc=self.dataset_num_proc, - writer_batch_size=10, - desc="Tokenizing train dataset", - ) - if eval_dataset is not None: - eval_dataset = eval_dataset.map( - self.tokenize_row if not self.is_vision_model else self.process_row, - fn_kwargs=fn_kwargs, - num_proc=self.dataset_num_proc, - writer_batch_size=10, - desc="Tokenizing eval dataset", - ) + # Dataset preparation + train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train") + if eval_dataset is not None: + if isinstance(eval_dataset, dict): + eval_dataset = { + key: self._prepare_dataset(dataset, processing_class, args, key) + for key, dataset in eval_dataset.items() + } + else: + eval_dataset = self._prepare_dataset(eval_dataset, processing_class, args, "eval") super().__init__( model=model, @@ -540,6 +504,48 @@ def make_inputs_require_grad(module, input, output): if self.loss_type == "bco_pair": self.running = RunningMoments(self.accelerator) + def _prepare_dataset( + self, + dataset: Union[Dataset, IterableDataset], + processing_class: Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin], + args: DPOConfig, + dataset_name: str, + ) -> Union[Dataset, IterableDataset]: + # Build the kwargs for the `map` function + map_kwargs = {"writer_batch_size": 10} + if isinstance(dataset, Dataset): # IterableDataset does not support num_proc + map_kwargs["num_proc"] = args.dataset_num_proc + + with PartialState().local_main_process_first(): + # Extract prompt if needed + if isinstance(dataset, Dataset): # `IterableDataset.map` does not support `desc` + map_kwargs["desc"] = f"Extracting prompt in {dataset_name} dataset" + dataset = dataset.map(maybe_extract_prompt, **map_kwargs) + + # Apply the chat template if needed + if isinstance(dataset, Dataset): # `IterableDataset.map` does not support `desc` + map_kwargs["desc"] = f"Applying chat template to {dataset_name} dataset" + dataset = dataset.map(maybe_apply_chat_template, fn_kwargs={"tokenizer": processing_class}, **map_kwargs) + + # Tokenize the dataset + if isinstance(dataset, Dataset): # `IterableDataset.map` does not support `desc` + map_kwargs["desc"] = f"Tokenizing {dataset_name} dataset" + + dataset = dataset.map( + self.tokenize_row if not self.is_vision_model else self.process_row, + remove_columns=["prompt", "chosen", "rejected"], + fn_kwargs={ + "processing_class": processing_class, + "max_prompt_length": args.max_prompt_length, + "max_completion_length": args.max_completion_length, + # for enc-dec, we add the special tokens ([bos_token] + prompt + [eos_token]; completion + [eos_token]) + "add_special_tokens": False, + }, + **map_kwargs, + ) + + return dataset + @staticmethod def tokenize_row(features, processing_class, max_prompt_length, max_completion_length, add_special_tokens): """ diff --git a/trl/trainer/gkd_config.py b/trl/trainer/gkd_config.py index e9b9d76363..e110b047d1 100644 --- a/trl/trainer/gkd_config.py +++ b/trl/trainer/gkd_config.py @@ -41,7 +41,7 @@ class GKDConfig(SFTConfig): Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the teacher model from a string. disable_dropout (`bool`, *optional*, defaults to `True`): - Whether or not to disable dropouts in `model`. + Whether to disable dropout in the model. seq_kd (`bool`, *optional*, defaults to `False`): Seq_kd parameter that controls whether to perform Sequence-Level KD (can be viewed as supervised FT on teacher-generated output). diff --git a/trl/trainer/gkd_trainer.py b/trl/trainer/gkd_trainer.py index be48f1925b..f212b5a296 100644 --- a/trl/trainer/gkd_trainer.py +++ b/trl/trainer/gkd_trainer.py @@ -126,6 +126,7 @@ def __init__( else: teacher_model = AutoModelForCausalLM.from_pretrained(teacher_model, **teacher_model_init_kwargs) + # Disable dropout in the model if args.disable_dropout: disable_dropout_in_model(self.model) diff --git a/trl/trainer/kto_config.py b/trl/trainer/kto_config.py index 563d0cdbc9..e5feb2dbad 100644 --- a/trl/trainer/kto_config.py +++ b/trl/trainer/kto_config.py @@ -77,7 +77,7 @@ class KTOConfig(TrainingArguments): dataset_num_proc: (`Optional[int]`, *optional*, defaults to `None`): Number of processes to use for processing the dataset. disable_dropout (`bool`, *optional*, defaults to `True`): - Whether to disable dropout in the model. + Whether to disable dropout in the model and reference model. """ learning_rate: float = 1e-6 diff --git a/trl/trainer/kto_trainer.py b/trl/trainer/kto_trainer.py index d054d97e7d..b19955c145 100644 --- a/trl/trainer/kto_trainer.py +++ b/trl/trainer/kto_trainer.py @@ -304,8 +304,6 @@ class KTOTrainer(Trainer): The function to use to preprocess the logits before computing the metrics. peft_config (`dict`, defaults to `None`): The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in a PEFT model. - disable_dropout (`bool`, defaults to `True`): - Whether or not to disable dropouts in `model` and `ref_model`. compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to metric values. @@ -526,6 +524,7 @@ def make_inputs_require_grad(module, input, output): else: self.use_dpo_data_collator = False + # Disable dropout in the model and reference model if args.disable_dropout: disable_dropout_in_model(model) if self.ref_model is not None: diff --git a/trl/trainer/online_dpo_config.py b/trl/trainer/online_dpo_config.py index 0b06c79cb5..5e75ede883 100644 --- a/trl/trainer/online_dpo_config.py +++ b/trl/trainer/online_dpo_config.py @@ -57,7 +57,7 @@ class OnlineDPOConfig(TrainingArguments): dataset_num_proc (`Optional[int]`, *optional*, defaults to `None`): Number of processes to use for processing the dataset. disable_dropout (`bool`, *optional*, defaults to `True`): - Whether to disable dropout in the model. + Whether to disable dropout in the model and reference model. """ learning_rate: float = 5e-7 diff --git a/trl/trainer/online_dpo_trainer.py b/trl/trainer/online_dpo_trainer.py index 68008881f5..ebab5cdcfc 100644 --- a/trl/trainer/online_dpo_trainer.py +++ b/trl/trainer/online_dpo_trainer.py @@ -196,9 +196,11 @@ def __init__( # Get peft model with the given config model = get_peft_model(model, peft_config) - # Disable dropout in the model if specified + # Disable dropout in the model and reference model if args.disable_dropout: disable_dropout_in_model(model) + if self.ref_model is not None: + disable_dropout_in_model(self.ref_model) # Handle the ref_model # Usually, the user wants the ref model to be the initial version of the model. When using PEFT, it's easy to diff --git a/trl/trainer/orpo_trainer.py b/trl/trainer/orpo_trainer.py index 50392526db..65d80802be 100644 --- a/trl/trainer/orpo_trainer.py +++ b/trl/trainer/orpo_trainer.py @@ -282,6 +282,7 @@ def make_inputs_require_grad(module, input, output): else: self.use_dpo_data_collator = False + # Disable dropout in the model and reference model if args.disable_dropout: disable_dropout_in_model(model) diff --git a/trl/trainer/prm_config.py b/trl/trainer/prm_config.py index 4558084572..21a4fc5662 100644 --- a/trl/trainer/prm_config.py +++ b/trl/trainer/prm_config.py @@ -35,6 +35,8 @@ class PRMConfig(TrainingArguments): Maximum length of the sequences (prompt + completion) used for truncation. max_completion_length (`Optional[int]`, *optional*, defaults to `None`): Maximum length of the completion used for truncation. The completion is the concatenation of the steps. + disable_dropout (`bool`, *optional*, defaults to `True`): + Whether to disable dropout in the model. step_separator (`str`, *optional*, defaults to `"\n"`): Separator used to separate each step of the reasoning process. train_on_last_step_only (`bool`, *optional*, defaults to `False`): @@ -46,6 +48,7 @@ class PRMConfig(TrainingArguments): learning_rate: float = 1e-5 max_length: Optional[int] = None max_completion_length: Optional[int] = None + disable_dropout: bool = True step_separator: str = "\n" train_on_last_step_only: bool = False dataset_num_proc: Optional[int] = None diff --git a/trl/trainer/prm_trainer.py b/trl/trainer/prm_trainer.py index dbb3558d57..47d73ce19c 100644 --- a/trl/trainer/prm_trainer.py +++ b/trl/trainer/prm_trainer.py @@ -39,7 +39,7 @@ from transformers.utils import is_peft_available from .prm_config import PRMConfig -from .utils import compute_accuracy, generate_model_card +from .utils import compute_accuracy, disable_dropout_in_model, generate_model_card if is_peft_available(): @@ -130,6 +130,10 @@ def __init__( model = get_peft_model(model, peft_config) + # Disable dropout in the model + if args.disable_dropout: + disable_dropout_in_model(model) + if compute_metrics is None: compute_metrics = compute_accuracy diff --git a/trl/trainer/reward_config.py b/trl/trainer/reward_config.py index 6e3eeab372..8018a2844c 100644 --- a/trl/trainer/reward_config.py +++ b/trl/trainer/reward_config.py @@ -31,6 +31,8 @@ class RewardConfig(TrainingArguments): max_length (`Optional[int]`, *optional*, defaults to `None`): Maximum length of the sequences (prompt + completion) in the batch. This argument is required if you want to use the default data collator. + disable_dropout (`bool`, *optional*, defaults to `True`): + Whether to disable dropout in the model. dataset_num_proc (`int`, *optional*, defaults to `None`): Number of processes to use for processing the dataset. center_rewards_coefficient (`float`, *optional*, defaults to `None`): @@ -42,6 +44,7 @@ class RewardConfig(TrainingArguments): """ max_length: Optional[int] = None + disable_dropout: bool = True dataset_num_proc: Optional[int] = None center_rewards_coefficient: Optional[float] = None remove_unused_columns: bool = False diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py index 109d8a47cf..79b237b9e7 100644 --- a/trl/trainer/reward_trainer.py +++ b/trl/trainer/reward_trainer.py @@ -47,6 +47,7 @@ RewardDataCollatorWithPadding, compute_accuracy, decode_and_strip_padding, + disable_dropout_in_model, generate_model_card, get_comet_experiment_url, log_table_to_comet_experiment, @@ -169,6 +170,10 @@ def __init__( model = get_peft_model(model, peft_config) + # Disable dropout in the model + if args.disable_dropout: + disable_dropout_in_model(model) + if compute_metrics is None: compute_metrics = compute_accuracy