Skip to content

Commit

Permalink
Merge branch 'main' into field
Browse files Browse the repository at this point in the history
  • Loading branch information
qgallouedec authored Jan 6, 2025
2 parents 2ea9cb9 + d9ee2fd commit b0b6f51
Show file tree
Hide file tree
Showing 30 changed files with 439 additions and 201 deletions.
4 changes: 2 additions & 2 deletions docs/source/dpo_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,6 @@ dpo_trainer = DPOTrainer(

[[autodoc]] DPOConfig

## PreferenceCollator
## DataCollatorForPreference

[[autodoc]] trainer.dpo_trainer.PreferenceCollator
[[autodoc]] trainer.dpo_trainer.DataCollatorForPreference
35 changes: 34 additions & 1 deletion docs/source/reducing_memory_usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,37 @@ training_args = SFTConfig(..., max_length=...)
```

</hfoption>
</hfoptions>
</hfoptions>

## Packing

<Tip>

This technique applies only to SFT.

</Tip>


[Truncation](#truncation) has several drawbacks:
1. **Loss of information**: Key data at the end of a sequence may be discarded.
2. **Choosing truncation length**: Too short loses data; too long undermines efficiency.

Packing, introduced in [Raffel et al., 2020](https://huggingface.co/papers/1910.10683), addresses these issues by grouping sequences instead of truncating. It concatenates and splits dataset sequences into the desired lengths.

<div class="flex justify-center">
<img src="https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/packing.png" alt="Packing" width="600"/>
</div>

Packing eliminates padding, preserves all sequence information, and allows for flexible sequence lengths, making it a more efficient alternative to truncation. To enable packing, use `packing=True` in the [`SFTConfig`]:

```python
from trl import SFTConfig

training_args = SFTConfig(..., packing=True, max_seq_length=512)
```

<Tip warning={true}>

Packing may cause batch contamination, where adjacent sequences influence one another. This can be problematic for some applications. For more details, see [#1230](https://github.com/huggingface/trl/issues/1230).

</Tip>
4 changes: 2 additions & 2 deletions tests/test_bco_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,8 @@ def test_bco_trainer_generate_during_eval_no_wandb(self):

with self.assertRaisesRegex(
ValueError,
expected_regex="`generate_during_eval=True` requires Weights and Biases to be installed."
" Please install with `pip install wandb` to resolve.",
expected_regex="`generate_during_eval=True` requires Weights and Biases or Comet to be installed."
" Please install `wandb` or `comet-ml` to resolve.",
):
BCOTrainer(
model=self.model,
Expand Down
45 changes: 42 additions & 3 deletions tests/test_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import is_peft_available

from tests.testing_utils import require_mergekit
from tests.testing_utils import require_comet, require_mergekit
from trl import BasePairwiseJudge, DPOConfig, DPOTrainer, LogCompletionsCallback, MergeModelCallback, WinRateCallback
from trl.mergekit_utils import MergeConfig

Expand Down Expand Up @@ -216,7 +216,6 @@ def test_lora(self):
self.assertListEqual(winrate_history, self.expected_winrates)


@require_wandb
class LogCompletionsCallbackTester(unittest.TestCase):
def setUp(self):
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
Expand All @@ -234,7 +233,8 @@ def tokenize_function(examples):

self.generation_config = GenerationConfig(max_length=32)

def test_basic(self):
@require_wandb
def test_basic_wandb(self):
import wandb

with tempfile.TemporaryDirectory() as tmp_dir:
Expand Down Expand Up @@ -271,6 +271,45 @@ def test_basic(self):
# Check that the prompt is in the log
self.assertIn(self.dataset["test"][0]["prompt"], completions["data"][0])

@require_comet
def test_basic_comet(self):
import comet_ml

with tempfile.TemporaryDirectory() as tmp_dir:
training_args = TrainingArguments(
output_dir=tmp_dir,
eval_strategy="steps",
eval_steps=2, # evaluate every 2 steps
per_device_train_batch_size=2, # 8 samples in total so 4 batches of 2 per epoch
per_device_eval_batch_size=2,
report_to="comet_ml",
)
trainer = Trainer(
model=self.model,
args=training_args,
train_dataset=self.dataset["train"],
eval_dataset=self.dataset["test"],
processing_class=self.tokenizer,
)
completions_callback = LogCompletionsCallback(trainer, self.generation_config, num_prompts=2)
trainer.add_callback(completions_callback)
trainer.train()

# close experiment to make sure all pending data are flushed
experiment = comet_ml.get_running_experiment()
assert experiment is not None
experiment.end()

# get experiment assets and check that all required tables was logged
steps = len(self.dataset["train"]) + len(self.dataset["test"])
tables_logged = int(steps / 2) + 1 # +1 to include zero step

api_experiment = comet_ml.APIExperiment(previous_experiment=experiment.id)
tables = api_experiment.get_asset_list("dataframe")
assert tables is not None
assert len(tables) == tables_logged
assert all(table["fileName"] == "completions.csv" for table in tables)


# On Windows, temporary directory cleanup fails when using the MergeModelCallback.
# This is not an issue with the functionality of the code itself, but it can cause the test to fail
Expand Down
74 changes: 74 additions & 0 deletions tests/test_collators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import torch

from trl.trainer.dpo_trainer import DataCollatorForPreference


class TestDataCollatorForPreference(unittest.TestCase):
def setUp(self):
self.collator = DataCollatorForPreference(pad_token_id=0)

def assertTensorEqual(self, tensor1, tensor2):
self.assertTrue(torch.equal(tensor1, tensor2), f"Tensors are not equal:\n{tensor1}\n{tensor2}")

def test_padding_behavior(self):
examples = [
{"prompt_input_ids": [1, 2, 3], "chosen_input_ids": [4, 5], "rejected_input_ids": [6]},
{"prompt_input_ids": [7, 8], "chosen_input_ids": [9, 10], "rejected_input_ids": [11, 12, 13]},
]
output = self.collator.torch_call(examples)

expected_prompt_input_ids = torch.tensor([[1, 2, 3], [0, 7, 8]])
expected_prompt_attention_mask = torch.tensor([[1, 1, 1], [0, 1, 1]])
expected_chosen_input_ids = torch.tensor([[4, 5], [9, 10]])
expected_chosen_attention_mask = torch.tensor([[1, 1], [1, 1]])
expected_rejected_input_ids = torch.tensor([[6, 0, 0], [11, 12, 13]])
expected_rejected_attention_mask = torch.tensor([[1, 0, 0], [1, 1, 1]])

self.assertTensorEqual(output["prompt_input_ids"], expected_prompt_input_ids)
self.assertTensorEqual(output["prompt_attention_mask"], expected_prompt_attention_mask)
self.assertTensorEqual(output["chosen_input_ids"], expected_chosen_input_ids)
self.assertTensorEqual(output["chosen_attention_mask"], expected_chosen_attention_mask)
self.assertTensorEqual(output["rejected_input_ids"], expected_rejected_input_ids)
self.assertTensorEqual(output["rejected_attention_mask"], expected_rejected_attention_mask)

def test_optional_fields(self):
examples = [
{
"prompt_input_ids": [1],
"chosen_input_ids": [2],
"rejected_input_ids": [3],
"pixel_values": [[[0.1, 0.2], [0.3, 0.4]]], # Example 3D tensor (1x2x2)
},
{
"prompt_input_ids": [4],
"chosen_input_ids": [5],
"rejected_input_ids": [6],
"pixel_values": [[[0.5, 0.6], [0.7, 0.8]]], # Example 3D tensor (1x2x2)
},
]
output = self.collator.torch_call(examples)

expected_pixel_values = torch.tensor(
[
[[[0.1, 0.2], [0.3, 0.4]]],
[[[0.5, 0.6], [0.7, 0.8]]],
]
) # Shape: (2, 1, 2, 2)

self.assertTensorEqual(output["pixel_values"], expected_pixel_values)
20 changes: 19 additions & 1 deletion tests/test_data_collator_completion_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def test_padding_free(self):
inst1 = "### System: You are a helpful assistant.\n\n### User: How much is 2+2?\n\n### Assistant: 2+2 equals 4"
inst2 = "### System: You are a honest and helpful assistant.\n\n### User: What is the answer of 22x22?\n\n### Assistant: 22x22 equals 484"

response_template = "\n### Assistant:"
response_template = "\n\n### Assistant:"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)
collator_paddingfree = DataCollatorForCompletionOnlyLM(
response_template, tokenizer=tokenizer, padding_free=True
Expand Down Expand Up @@ -143,3 +143,21 @@ def test_padding_free(self):
self.assertTrue((input_ids_remove_pad == batch_paddingfree["input_ids"]).all())
self.assertTrue((expected_position_ids == batch_paddingfree["position_ids"]).all())
self.assertTrue((expected_labels == batch_paddingfree["labels"]).all())

def test_data_collator_for_completion_only_lm(self):
# The tokenizer isn't use but the collator needs it to be provided.
tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")

collator = DataCollatorForCompletionOnlyLM(tokenizer.decode(9999), tokenizer=tokenizer, padding_free=True)

tokenized_instruction = [
{"input_ids": [1, 2, 3, 9999, 4, 5], "attention_mask": [1, 1, 1, 1, 1, 1]},
{"input_ids": [6, 7, 8, 9, 9999, 10, 11], "attention_mask": [1, 1, 1, 1, 1, 1, 1]},
]
batch = collator(tokenized_instruction)

self.assertEqual(batch["position_ids"].tolist(), [[0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 6]]) # flat pos ids
self.assertEqual(batch["cu_seq_lens_q"].tolist(), [0, 6, 13]) # start idx of each seq + total number of tokens
self.assertEqual(batch["cu_seq_lens_k"].tolist(), [0, 6, 13]) # idem
self.assertEqual(batch["max_length_k"], 7) # max length in batch, here 7 (second sequence)
self.assertEqual(batch["max_length_q"], 7) # idem
4 changes: 2 additions & 2 deletions tests/test_dpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,8 +571,8 @@ def test_dpo_trainer_generate_during_eval_no_wandb(self):

with self.assertRaisesRegex(
ValueError,
expected_regex="`generate_during_eval=True` requires Weights and Biases to be installed."
" Please install `wandb` to resolve.",
expected_regex="`generate_during_eval=True` requires Weights and Biases or Comet to be installed."
" Please install `wandb` or `comet-ml` to resolve.",
):
DPOTrainer(
model=self.model,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_kto_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,8 @@ def test_kto_trainer_generate_during_eval_no_wandb(self):

with self.assertRaisesRegex(
ValueError,
expected_regex="`generate_during_eval=True` requires Weights and Biases to be installed."
" Please install with `pip install wandb` to resolve.",
expected_regex="`generate_during_eval=True` requires Weights and Biases or Comet to be installed."
" Please install `wandb` or `comet-ml` to resolve.",
):
KTOTrainer(
model=self.model,
Expand Down
30 changes: 0 additions & 30 deletions tests/test_rloo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import platform
import subprocess
import tempfile
import unittest

Expand All @@ -24,34 +22,6 @@
from trl import RLOOConfig, RLOOTrainer


def test():
command = """\
python examples/scripts/rloo/rloo.py \
--dataset_name trl-internal-testing/descriptiveness-sentiment-trl-style \
--dataset_train_split descriptiveness \
--learning_rate 3e-6 \
--output_dir models/minimal/rloo \
--per_device_train_batch_size 4 \
--gradient_accumulation_steps 1 \
--total_episodes 10 \
--model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
--sft_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
--reward_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
--missing_eos_penalty 1.0 \
--save_strategy no \
--stop_token eos
"""
if platform.system() == "Windows":
# windows CI does not work with subprocesses for some reason
# e.g., https://github.com/huggingface/trl/actions/runs/9600036224/job/26475286210?pr=1743
return
subprocess.run(
command,
shell=True,
check=True,
)


class RLOOTrainerTester(unittest.TestCase):
def setUp(self):
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
Expand Down
9 changes: 8 additions & 1 deletion tests/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import random
import unittest

from transformers import is_bitsandbytes_available, is_sklearn_available, is_wandb_available
from transformers import is_bitsandbytes_available, is_comet_available, is_sklearn_available, is_wandb_available

from trl import BaseBinaryJudge, BasePairwiseJudge, is_diffusers_available, is_llm_blender_available
from trl.import_utils import is_mergekit_available
Expand Down Expand Up @@ -65,6 +65,13 @@ def require_sklearn(test_case):
return unittest.skipUnless(is_sklearn_available(), "test requires sklearn")(test_case)


def require_comet(test_case):
"""
Decorator marking a test that requires Comet. Skips the test if Comet is not available.
"""
return unittest.skipUnless(is_comet_available(), "test requires comet_ml")(test_case)


class RandomBinaryJudge(BaseBinaryJudge):
"""
Random binary judge, for testing purposes.
Expand Down
10 changes: 9 additions & 1 deletion trl/trainer/bco_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,10 @@ class BCOConfig(TrainingArguments):
truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
This argument is required if you want to use the default data collator.
disable_dropout (`bool`, *optional*, defaults to `True`):
Whether to disable dropout in the model and reference model.
generate_during_eval (`bool`, *optional*, defaults to `False`):
If `True`, generates and logs completions from both the model and the reference model to W&B during
If `True`, generates and logs completions from both the model and the reference model to W&B or Comet during
evaluation.
is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
Expand Down Expand Up @@ -117,6 +119,12 @@ class BCOConfig(TrainingArguments):
"default data collator."
},
)
disable_dropout: bool = field(
default=True,
metadata={
"help": "Whether to disable dropout in the model and reference model."
},
)
generate_during_eval: bool = field(
default=False,
metadata={
Expand Down
Loading

0 comments on commit b0b6f51

Please sign in to comment.