update README

llm-jp · Oct 21, 2023 · e0d9e09 · e0d9e09
1 parent acb851d
commit e0d9e09
Showing 1 changed file with 41 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -97,3 +97,44 @@ accelerate launch --config_file accelerate_config_zero3.yaml \
     --model_name_or_path llm-jp/llm-jp-13b-v1.0 \
     --output_dir results/llm-jp-13b-v1.0_jaster-dolly-oasst
 ```
+
+### Fine-tuning with PEFT
+
+#### For the 1.3B model
+
+```bash
+accelerate launch --config_file accelerate_config_zero3.yaml \
+    train.py \
+    --num_train_epochs 2 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --learning_rate 1e-5 \
+    --warmup_ratio 0.1 \
+    --lr_scheduler cosine \
+    --bf16 \
+    --max_seq_length 2048 \
+    --data_files jamp.json janli.json jcommonsenseqa.json jemhopqa.json jnli.json jsem.json jsick.json jsquad.json jsts.json niilc.json dolly_deepl.json oasst_deepl.json \
+    --use_peft \
+    --model_name_or_path llm-jp/llm-jp-1.3b-v1.0 \
+    --output_dir results/llm-jp-1.3b-v1.0_jaster-dolly-oasst
+```
+
+#### For the 13B model
+
+```bash
+accelerate launch --config_file accelerate_config_zero3.yaml \
+    train.py \
+    --num_train_epochs 2 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 32 \
+    --learning_rate 1e-5 \
+    --warmup_ratio 0.1 \
+    --lr_scheduler cosine \
+    --bf16 \
+    --max_seq_length 2048 \
+    --gradient_checkpointing \
+    --data_files jamp.json janli.json jcommonsenseqa.json jemhopqa.json jnli.json jsem.json jsick.json jsquad.json jsts.json niilc.json dolly_deepl.json oasst_deepl.json \
+    --use_peft \
+    --model_name_or_path llm-jp/llm-jp-13b-v1.0 \
+    --output_dir results/llm-jp-13b-v1.0_jaster-dolly-oasst
+```