-
Notifications
You must be signed in to change notification settings - Fork 0
/
rm_training_config.yaml
79 lines (74 loc) · 1.68 KB
/
rm_training_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
defaults_rm:
rng_seed: 0xa1221f97
is_reward_model: true
pooling: last
learning_rate: 1e-5
gradient_checkpointing: false
gradient_accumulation_steps: 2
per_device_train_batch_size: 2
per_device_eval_batch_size: 2
adam_beta1: 0.9
adam_beta2: 0.95
adam_epsilon: 1e-12
weight_decay: 0.00
warmup_steps: 10
eval_steps: 50
save_steps: 100
save_strategy: steps
max_length: 512
num_train_epochs: 2
logging_steps: 10
max_grad_norm: 2.0
save_total_limit: 4
dtype: fp16
eval_accumulation_steps:
freeze_layer:
cache_dir: .cache
loss_fn: RMLoss
score_l2_reg: 0.001
eval_size:
log_dir: "base"
quantization: false
seq2seqmodel: false
fuse_gelu: true
log_wandb: false
verbose: false
output_dir: .saved_models_rm
use_custom_sampler: false
residual_dropout: 0.0
use_flash_attention: false
sort_by_length: false
per_digit_tokens: false
datasets_extra: []
metrics: ["accuracy", "kendalltau"]
deepspeed_config: configs/zero_config.json
max_replies: 5
use_system_tag:
use_system_tag: True
system_property_dropout: 0.5
system_add_length: True
more_epochs:
num_train_epochs: 3
rm_deberta_v3:
is_reward_model: true
pooling: last
use_custom_sampler: true
sort_by_length: false
model_name: microsoft/deberta-v3-base
learning_rate: 8e-6
residual_dropout: 0.01
residual_dropout_lima: true
weight_decay: 0.0
dtype: float32
max_length: 2048
use_flash_attention: true
warmup_steps: 50
gradient_accumulation_steps: 2
per_device_train_batch_size: 1
per_device_eval_batch_size: 5
num_train_epochs: 2
eval_steps: 10000
save_steps: 10000
use_system_tag: false
system_property_dropout: 0.5
system_add_length: false