forked from PaddlePaddle/PaddleNLP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dpo_argument.py
100 lines (84 loc) Β· 3.7 KB
/
dpo_argument.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from dataclasses import dataclass, field
from typing import Optional
from paddlenlp.trainer import TrainingArguments
def add_start_docstrings(*docstr):
"""Adds docstrings for a function."""
def docstring_decorator(fn):
fn.__doc__ = "".join(docstr) + (fn.__doc__ if fn.__doc__ is not None else "")
return fn
return docstring_decorator
@dataclass
@add_start_docstrings(TrainingArguments.__doc__)
class DPOTrainingArguments(TrainingArguments):
"""DPOTrainingArguments"""
unified_checkpoint: bool = field(
default=True,
metadata={"help": "Enable fused linear grad add strategy."},
)
unified_checkpoint_config: Optional[str] = field(
default="",
metadata={"help": "Configs to unify hybrid parallel checkpoint.\n"},
)
dpo_beta: float = field(default=0.1, metadata={"help": "the beta parameter for DPO loss"})
dpo_label_smoothing: float = field(default=0.0, metadata={"help": "label_smoothing ratio"})
dpo_loss_type: str = field(default="sigmoid", metadata={"help": "DPO loss type"})
@dataclass
class DPODataArgument:
"""DataArgument"""
train_dataset_path: str = field(default="./data/train.jsonl", metadata={"help": "Path to the train dataset dir."})
dev_dataset_path: str = field(default="./data/dev.jsonl", metadata={"help": "Path to the dev dataset dir."})
max_seq_len: int = field(default=4096, metadata={"help": "Maximum sequence length."})
max_prompt_len: int = field(default=2048, metadata={"help": "Maximum prompt length."})
autotuner_benchmark: bool = field(
default=False,
metadata={"help": "Whether to run benchmark by autotuner. True for from_scratch."},
)
benchmark: bool = field(
default=False,
metadata={"help": "Whether to run benchmark by autotuner. True for from_scratch."},
)
greedy_intokens: bool = field(
default=True,
metadata={"help": "Whether apply greedy intokens."},
)
buffer_size: int = field(default=500, metadata={"help": "Buffer size for greedy_intokens strategy."})
@dataclass
class DPOModelArgument:
"""ModelArgument"""
model_name_or_path: str = field(
default=None, metadata={"help": "Pretrained model name or path to local directory."}
)
tokenizer_name_or_path: Optional[str] = field(
default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
)
use_flash_attention: bool = field(default=False, metadata={"help": "Whether to use flash attention"})
recompute_granularity: str = field(
default="full",
metadata={
"help": "The granularity of recompute training can be selected as `full` or `full_attn` or `core_attn`."
},
)
use_attn_mask_start_row_indices: bool = field(
default=False, metadata={"help": "Whether to use attn_mask_start_row_indices in flash attention."}
)
virtual_pp_degree: int = field(
default=1,
metadata={"help": "virtual_pp_degree"},
)
sequence_parallel: bool = field(
default=False,
metadata={"help": "whether to use sequence parallel"},
)