From 10854711a9744ac9eae110dd27d9bcee1d0edc90 Mon Sep 17 00:00:00 2001 From: Cong Date: Tue, 6 Jun 2023 22:18:08 +0800 Subject: [PATCH 1/6] feat: support add special tokens to tokenizer. --- trlx/trainer/__init__.py | 2 ++ trlx/trainer/accelerate_base_trainer.py | 6 ++++++ trlx/trlx.py | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/trlx/trainer/__init__.py b/trlx/trainer/__init__.py index 8e0d239df..5b8f67ab2 100644 --- a/trlx/trainer/__init__.py +++ b/trlx/trainer/__init__.py @@ -41,6 +41,7 @@ def __init__( logit_mask=None, stop_sequences=None, train_mode=False, + additional_special_tokens=None, ): self.store: BaseRolloutStore = None self.config = config @@ -49,6 +50,7 @@ def __init__( self.train_mode = train_mode self.logit_mask = logit_mask self.stop_sequences = stop_sequences + self.additional_special_tokens = additional_special_tokens def push_to_store(self, data): self.store.push(data) diff --git a/trlx/trainer/accelerate_base_trainer.py b/trlx/trainer/accelerate_base_trainer.py index 62c09fd0c..dc6b04651 100644 --- a/trlx/trainer/accelerate_base_trainer.py +++ b/trlx/trainer/accelerate_base_trainer.py @@ -74,6 +74,12 @@ def __init__(self, config, **kwargs): # noqa: C901 if self.tokenizer.pad_token is None: self.tokenizer.pad_token = "<|padding|>" + if self.additional_special_tokens is not None and type(self.additional_special_tokens) is list: + self.tokenizer.add_special_tokens( + {"additional_special_tokens": self.additional_special_tokens} + ) + self.model.base_model.resize_token_embeddings(len(self.tokenizer)) + script_name = os.path.basename(sys.argv[0]).rsplit(".", 1)[0] if not isinstance(config.model.model_path, str): model_name = str(config.model.model_path).split()[0] diff --git a/trlx/trlx.py b/trlx/trlx.py index 13ee5daaa..79a2e5217 100644 --- a/trlx/trlx.py +++ b/trlx/trlx.py @@ -23,6 +23,7 @@ def train( # noqa: C901 metric_fn: Optional[Callable[[List[str], List[str], List[str]], Dict[str, List[float]]]] = None, config: Optional[TRLConfig] = None, stop_sequences: Optional[List[str]] = [], + additional_special_tokens: Optional[List[str]] = None, ): """ Dispatches online, offline reinforcement training or supervised finetuning @@ -54,6 +55,9 @@ def train( # noqa: C901 stop_sequences (Optional[List[str]]): String sequences to trim generations (both for generating of experience and evaluation) up to its encounter in them. Generations will not contain them and also will also be right-stripped + additional_special_tokens (Optional[List[str]]): + A list of additional special tokens. Add them to the tokenizer to ensure they won’t be split by + the tokenization process. """ if config is None: warnings.warn( @@ -81,6 +85,7 @@ def train( # noqa: C901 reward_fn=reward_fn, metric_fn=metric_fn, stop_sequences=stop_sequences, + additional_special_tokens=additional_special_tokens, **config.train.trainer_kwargs, ) From d806f0abbdee2f4a269e2758a67eb3f0712924a9 Mon Sep 17 00:00:00 2001 From: Cong Date: Thu, 8 Jun 2023 22:43:37 +0800 Subject: [PATCH 2/6] feat: support add tokens to tokenizer. * Resize the model by-default * Adding special tokens is ignored by the decode phase of the PPO. This is because it needs to skip certain special tokens, such as EOS tokens. Therefore only add normal tokens. --- trlx/models/modeling_ppo.py | 12 ++++++++++++ trlx/trainer/__init__.py | 4 ++-- trlx/trainer/accelerate_base_trainer.py | 15 +++++++++------ trlx/trlx.py | 12 ++++++------ 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/trlx/models/modeling_ppo.py b/trlx/models/modeling_ppo.py index 51d54cf36..067f58b72 100644 --- a/trlx/models/modeling_ppo.py +++ b/trlx/models/modeling_ppo.py @@ -543,6 +543,18 @@ def __init__( for parameter in self.parameters(): parameter.requires_grad_(False) + def set_input_embeddings(self, value): + self.embed_tokens = value + + def get_input_embeddings(self): + return self.embed_tokens + + def get_output_embeddings(self): + return self.lm_head + + def set_output_embeddings(self, new_embeddings): + self.lm_head = new_embeddings + class GPTModelBranch(ModelBranch): def forward( # noqa: max-complexity diff --git a/trlx/trainer/__init__.py b/trlx/trainer/__init__.py index 5b8f67ab2..bedf522df 100644 --- a/trlx/trainer/__init__.py +++ b/trlx/trainer/__init__.py @@ -41,7 +41,7 @@ def __init__( logit_mask=None, stop_sequences=None, train_mode=False, - additional_special_tokens=None, + additional_tokens=None, ): self.store: BaseRolloutStore = None self.config = config @@ -50,7 +50,7 @@ def __init__( self.train_mode = train_mode self.logit_mask = logit_mask self.stop_sequences = stop_sequences - self.additional_special_tokens = additional_special_tokens + self.additional_tokens = additional_tokens def push_to_store(self, data): self.store.push(data) diff --git a/trlx/trainer/accelerate_base_trainer.py b/trlx/trainer/accelerate_base_trainer.py index dc6b04651..18bae35ac 100644 --- a/trlx/trainer/accelerate_base_trainer.py +++ b/trlx/trainer/accelerate_base_trainer.py @@ -68,18 +68,21 @@ def __init__(self, config, **kwargs): # noqa: C901 self.scheduler = self.setup_scheduler() self.tokenizer = AutoTokenizer.from_pretrained(config.tokenizer.tokenizer_path) + self.tokenizer.add_tokens(self.additional_tokens) + # resize the model by-default + self.model.base_model.resize_token_embeddings(len(self.tokenizer)) + if hasattr(self.model, "frozen_head"): + self.model.frozen_head.resize_token_embeddings(len(self.tokenizer)) + else: + # resize a reference model when hydra heads are not used + self.ref_model.resize_token_embeddings(len(self.tokenizer)) + self.tokenizer.padding_side = config.tokenizer.padding_side self.tokenizer.truncation_side = config.tokenizer.truncation_side self.tokenizer.sep_token = "" if self.tokenizer.pad_token is None: self.tokenizer.pad_token = "<|padding|>" - if self.additional_special_tokens is not None and type(self.additional_special_tokens) is list: - self.tokenizer.add_special_tokens( - {"additional_special_tokens": self.additional_special_tokens} - ) - self.model.base_model.resize_token_embeddings(len(self.tokenizer)) - script_name = os.path.basename(sys.argv[0]).rsplit(".", 1)[0] if not isinstance(config.model.model_path, str): model_name = str(config.model.model_path).split()[0] diff --git a/trlx/trlx.py b/trlx/trlx.py index 79a2e5217..bf7bf50f9 100644 --- a/trlx/trlx.py +++ b/trlx/trlx.py @@ -1,6 +1,6 @@ import os import warnings -from typing import Callable, Dict, Iterable, List, Optional, Tuple +from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union from trlx.data.configs import TRLConfig from trlx.data.default_configs import ( @@ -23,7 +23,7 @@ def train( # noqa: C901 metric_fn: Optional[Callable[[List[str], List[str], List[str]], Dict[str, List[float]]]] = None, config: Optional[TRLConfig] = None, stop_sequences: Optional[List[str]] = [], - additional_special_tokens: Optional[List[str]] = None, + additional_tokens: Optional[Union[str, List[str]]] = None, ): """ Dispatches online, offline reinforcement training or supervised finetuning @@ -55,9 +55,9 @@ def train( # noqa: C901 stop_sequences (Optional[List[str]]): String sequences to trim generations (both for generating of experience and evaluation) up to its encounter in them. Generations will not contain them and also will also be right-stripped - additional_special_tokens (Optional[List[str]]): - A list of additional special tokens. Add them to the tokenizer to ensure they won’t be split by - the tokenization process. + additional_tokens (Optional[Union[str, List[str]]]): + A list of additional tokens. The given tokens are added only if they don’t already exist + in the vocabulary, each token then gets a new attributed id """ if config is None: warnings.warn( @@ -85,7 +85,7 @@ def train( # noqa: C901 reward_fn=reward_fn, metric_fn=metric_fn, stop_sequences=stop_sequences, - additional_special_tokens=additional_special_tokens, + additional_tokens=additional_tokens, **config.train.trainer_kwargs, ) From fc93796be3e3d4e452f29f39f5c5d59ff9237c42 Mon Sep 17 00:00:00 2001 From: Cong Date: Wed, 14 Jun 2023 11:32:43 +0800 Subject: [PATCH 3/6] fix: move hydra heads resize_token_embeddings move hydra heads and ref_model 's resize_token_embeddings function calls to AcceleratePPOTrainer --- trlx/trainer/accelerate_base_trainer.py | 5 ----- trlx/trainer/accelerate_ppo_trainer.py | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/trlx/trainer/accelerate_base_trainer.py b/trlx/trainer/accelerate_base_trainer.py index 18bae35ac..84aff4c93 100644 --- a/trlx/trainer/accelerate_base_trainer.py +++ b/trlx/trainer/accelerate_base_trainer.py @@ -71,11 +71,6 @@ def __init__(self, config, **kwargs): # noqa: C901 self.tokenizer.add_tokens(self.additional_tokens) # resize the model by-default self.model.base_model.resize_token_embeddings(len(self.tokenizer)) - if hasattr(self.model, "frozen_head"): - self.model.frozen_head.resize_token_embeddings(len(self.tokenizer)) - else: - # resize a reference model when hydra heads are not used - self.ref_model.resize_token_embeddings(len(self.tokenizer)) self.tokenizer.padding_side = config.tokenizer.padding_side self.tokenizer.truncation_side = config.tokenizer.truncation_side diff --git a/trlx/trainer/accelerate_ppo_trainer.py b/trlx/trainer/accelerate_ppo_trainer.py index a7fcbb447..9af0b5ca2 100644 --- a/trlx/trainer/accelerate_ppo_trainer.py +++ b/trlx/trainer/accelerate_ppo_trainer.py @@ -72,8 +72,12 @@ def __init__(self, config: TRLConfig, **kwargs): # Set up a reference model when hydra heads are not used if not hasattr(self.model, "frozen_head") and not self.model.peft_type: self.ref_model = self.get_arch(self.config) + self.ref_model.resize_token_embeddings(len(self.tokenizer)) self.ref_model.to(self.accelerator.device) self.ref_model.eval() + else: + # resize hydra heads + self.model.frozen_head.resize_token_embeddings(len(self.tokenizer)) # Set up the KL controller # This helps prevent large divergences in the controller (policy) From 923ec655941445231ce341ba92ebd851351801b8 Mon Sep 17 00:00:00 2001 From: maxreciprocate <56548574+maxreciprocate@users.noreply.github.com> Date: Mon, 26 Jun 2023 14:32:09 +0300 Subject: [PATCH 4/6] fix(accelerate_ppo_trainer): resize token embeddings without hydra --- trlx/trainer/accelerate_ppo_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trlx/trainer/accelerate_ppo_trainer.py b/trlx/trainer/accelerate_ppo_trainer.py index 9af0b5ca2..7876a8cb0 100644 --- a/trlx/trainer/accelerate_ppo_trainer.py +++ b/trlx/trainer/accelerate_ppo_trainer.py @@ -72,7 +72,7 @@ def __init__(self, config: TRLConfig, **kwargs): # Set up a reference model when hydra heads are not used if not hasattr(self.model, "frozen_head") and not self.model.peft_type: self.ref_model = self.get_arch(self.config) - self.ref_model.resize_token_embeddings(len(self.tokenizer)) + self.ref_model.base_model.resize_token_embeddings(len(self.tokenizer)) self.ref_model.to(self.accelerator.device) self.ref_model.eval() else: From e7fc3e3f6a811917352d80652aff16348b830285 Mon Sep 17 00:00:00 2001 From: cOng Date: Tue, 4 Jul 2023 20:09:26 +0800 Subject: [PATCH 5/6] fix(accelerate_ppo_trainer): no resizing when using peft reference --- trlx/trainer/accelerate_ppo_trainer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/trlx/trainer/accelerate_ppo_trainer.py b/trlx/trainer/accelerate_ppo_trainer.py index 7876a8cb0..a62e42800 100644 --- a/trlx/trainer/accelerate_ppo_trainer.py +++ b/trlx/trainer/accelerate_ppo_trainer.py @@ -71,13 +71,15 @@ def __init__(self, config: TRLConfig, **kwargs): # Set up a reference model when hydra heads are not used if not hasattr(self.model, "frozen_head") and not self.model.peft_type: + # Full Reference Copy self.ref_model = self.get_arch(self.config) self.ref_model.base_model.resize_token_embeddings(len(self.tokenizer)) self.ref_model.to(self.accelerator.device) self.ref_model.eval() - else: - # resize hydra heads + elif hasattr(self.model, "frozen_head"): + # Hydra Reference: Use the frozen base layers and head as the reference model, resize hydra heads self.model.frozen_head.resize_token_embeddings(len(self.tokenizer)) + # TODO: else PEFT Reference, do something? # Set up the KL controller # This helps prevent large divergences in the controller (policy) From 41a01c988776e2b308bf67a10c11be1eb8462eff Mon Sep 17 00:00:00 2001 From: cOng Date: Thu, 17 Aug 2023 13:21:58 +0800 Subject: [PATCH 6/6] fix accelerate_ppo_trainer.py: resize frozen_head when it is not None --- trlx/trainer/accelerate_ppo_trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trlx/trainer/accelerate_ppo_trainer.py b/trlx/trainer/accelerate_ppo_trainer.py index a62e42800..97cd274e7 100644 --- a/trlx/trainer/accelerate_ppo_trainer.py +++ b/trlx/trainer/accelerate_ppo_trainer.py @@ -76,10 +76,10 @@ def __init__(self, config: TRLConfig, **kwargs): self.ref_model.base_model.resize_token_embeddings(len(self.tokenizer)) self.ref_model.to(self.accelerator.device) self.ref_model.eval() - elif hasattr(self.model, "frozen_head"): + elif hasattr(self.model, "frozen_head") and self.model.frozen_head is not None: # Hydra Reference: Use the frozen base layers and head as the reference model, resize hydra heads self.model.frozen_head.resize_token_embeddings(len(self.tokenizer)) - # TODO: else PEFT Reference, do something? + # else PEFT Reference # Set up the KL controller # This helps prevent large divergences in the controller (policy)