Merge pull request #169 from AIRobotZhang/main

Add IOPO
AlibabaResearch · Nov 25, 2024 · b22b277 · b22b277
2 parents dac5bd0 + 59c0543
commit b22b277
Show file tree

Hide file tree

Showing 140 changed files with 20,844 additions and 0 deletions.
diff --git a/IOPO/Benchmark-TRACE/2024_trace_evaluation.jsonl b/IOPO/Benchmark-TRACE/2024_trace_evaluation.jsonl
diff --git a/IOPO/Method-IOPO/.DS_Store b/IOPO/Method-IOPO/.DS_Store
diff --git a/IOPO/Method-IOPO/Makefile b/IOPO/Method-IOPO/Makefile
@@ -0,0 +1,14 @@
+.PHONY: quality style test
+
+check_dirs := scripts src tests
+
+quality:
+	ruff check $(check_dirs)
+	ruff format --check $(check_dirs)
+
+style:
+	ruff check $(check_dirs) --fix
+	ruff format $(check_dirs)
+
+test:
+	CUDA_VISIBLE_DEVICES= pytest tests/
diff --git a/IOPO/Method-IOPO/data/dataset_info.json b/IOPO/Method-IOPO/data/dataset_info.json
@@ -0,0 +1,12 @@
+{
+  "trace_iopo": {
+    "file_name": "trace.json",
+    "ranking": true,
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "chosen": "chosen",
+      "rejected": "rejected"
+    }
+  }
+}
diff --git a/IOPO/Method-IOPO/data/trace.json b/IOPO/Method-IOPO/data/trace.json
@@ -0,0 +1,22 @@
+[
+    {
+      "conversations": [
+          {
+              "from": "human",
+              "value": "为一篇关于高中生“备考”的论文提供一个更引人入胜、富有启发性的标题\n1. 标题长度控制在10至15个字之间；\n2. 标题中包含“高考”或“大学入学”等关键词；\n3. 采用激励鼓舞的语气风格，激发读者兴趣和共鸣；\n"
+          },
+          {
+              "from": "human",
+              "value": "为一篇关于高中生“备考”的论文提供一个更引人入胜、富有启发性的标题\n1. 标题长度控制在16至20个字之间；\n2. 标题中必须同时包含“高考”和“大学入学”两个关键词；\n3. 采用平实叙述的语气风格，注重信息传达。\n"
+          }
+      ],
+      "chosen": {
+          "from": "gpt",
+          "value": "\"高考前夜：点燃梦想，跨越星辰\"\n\n这个标题符合所有的约束条件，长度为10个字，包含了关键词“高考”，并且采用了激励鼓舞的语气风格，旨在激发读者的兴趣和共鸣，鼓励他们为高考全力以赴，勇敢追梦。"
+      },
+      "rejected": {
+          "from": "gpt",
+          "value": "\"高考征程：智慧备考，梦想大学入学的钥匙\" \n\n这个标题简洁明了，并且包含了所有的要求。它指出高考是通向大学入学的必经之路，而如何智慧备考，则是开启梦想大学之门的钥匙。这将启发读者对如何有效备考进行深入思考，并激发他们为实现大学梦想而努力奋斗的热情。"
+      }
+   }
+]
diff --git a/IOPO/Method-IOPO/examples/.DS_Store b/IOPO/Method-IOPO/examples/.DS_Store
diff --git a/IOPO/Method-IOPO/examples/deepspeed/ds_z0_config.json b/IOPO/Method-IOPO/examples/deepspeed/ds_z0_config.json
@@ -0,0 +1,28 @@
+{
+  "train_batch_size": "auto",
+  "train_micro_batch_size_per_gpu": "auto",
+  "gradient_accumulation_steps": "auto",
+  "gradient_clipping": "auto",
+  "zero_allow_untested_optimizer": true,
+  "fp16": {
+    "enabled": "auto",
+    "loss_scale": 0,
+    "loss_scale_window": 1000,
+    "initial_scale_power": 16,
+    "hysteresis": 2,
+    "min_loss_scale": 1
+  },
+  "bf16": {
+    "enabled": "auto"
+  },
+  "zero_optimization": {
+    "stage": 0,
+    "allgather_partitions": true,
+    "allgather_bucket_size": 5e8,
+    "overlap_comm": true,
+    "reduce_scatter": true,
+    "reduce_bucket_size": 5e8,
+    "contiguous_gradients": true,
+    "round_robin_gradients": true
+  }
+}
diff --git a/IOPO/Method-IOPO/examples/deepspeed/ds_z2_config.json b/IOPO/Method-IOPO/examples/deepspeed/ds_z2_config.json
@@ -0,0 +1,28 @@
+{
+  "train_batch_size": "auto",
+  "train_micro_batch_size_per_gpu": "auto",
+  "gradient_accumulation_steps": "auto",
+  "gradient_clipping": "auto",
+  "zero_allow_untested_optimizer": true,
+  "fp16": {
+    "enabled": "auto",
+    "loss_scale": 0,
+    "loss_scale_window": 1000,
+    "initial_scale_power": 16,
+    "hysteresis": 2,
+    "min_loss_scale": 1
+  },
+  "bf16": {
+    "enabled": "auto"
+  },
+  "zero_optimization": {
+    "stage": 2,
+    "allgather_partitions": true,
+    "allgather_bucket_size": 5e8,
+    "overlap_comm": true,
+    "reduce_scatter": true,
+    "reduce_bucket_size": 5e8,
+    "contiguous_gradients": true,
+    "round_robin_gradients": true
+  }
+}
diff --git a/IOPO/Method-IOPO/examples/deepspeed/ds_z2_offload_config.json b/IOPO/Method-IOPO/examples/deepspeed/ds_z2_offload_config.json
@@ -0,0 +1,32 @@
+{
+  "train_batch_size": "auto",
+  "train_micro_batch_size_per_gpu": "auto",
+  "gradient_accumulation_steps": "auto",
+  "gradient_clipping": "auto",
+  "zero_allow_untested_optimizer": true,
+  "fp16": {
+    "enabled": "auto",
+    "loss_scale": 0,
+    "loss_scale_window": 1000,
+    "initial_scale_power": 16,
+    "hysteresis": 2,
+    "min_loss_scale": 1
+  },
+  "bf16": {
+    "enabled": "auto"
+  },
+  "zero_optimization": {
+    "stage": 2,
+    "offload_optimizer": {
+      "device": "cpu",
+      "pin_memory": true
+    },
+    "allgather_partitions": true,
+    "allgather_bucket_size": 5e8,
+    "overlap_comm": true,
+    "reduce_scatter": true,
+    "reduce_bucket_size": 5e8,
+    "contiguous_gradients": true,
+    "round_robin_gradients": true
+  }
+}
diff --git a/IOPO/Method-IOPO/examples/deepspeed/ds_z3_config.json b/IOPO/Method-IOPO/examples/deepspeed/ds_z3_config.json
@@ -0,0 +1,30 @@
+{
+  "train_batch_size": "auto",
+  "train_micro_batch_size_per_gpu": "auto",
+  "gradient_accumulation_steps": "auto",
+  "gradient_clipping": "auto",
+  "zero_allow_untested_optimizer": true,
+  "fp16": {
+    "enabled": "auto",
+    "loss_scale": 0,
+    "loss_scale_window": 1000,
+    "initial_scale_power": 16,
+    "hysteresis": 2,
+    "min_loss_scale": 1
+  },
+  "bf16": {
+    "enabled": "auto"
+  },
+  "zero_optimization": {
+    "stage": 3,
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+  }
+}
diff --git a/IOPO/Method-IOPO/examples/deepspeed/ds_z3_offload_config.json b/IOPO/Method-IOPO/examples/deepspeed/ds_z3_offload_config.json
@@ -0,0 +1,38 @@
+{
+  "train_batch_size": "auto",
+  "train_micro_batch_size_per_gpu": "auto",
+  "gradient_accumulation_steps": "auto",
+  "gradient_clipping": "auto",
+  "zero_allow_untested_optimizer": true,
+  "fp16": {
+    "enabled": "auto",
+    "loss_scale": 0,
+    "loss_scale_window": 1000,
+    "initial_scale_power": 16,
+    "hysteresis": 2,
+    "min_loss_scale": 1
+  },
+  "bf16": {
+    "enabled": "auto"
+  },
+  "zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+      "device": "cpu",
+      "pin_memory": true
+    },
+    "offload_param": {
+      "device": "cpu",
+      "pin_memory": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+  }
+}
diff --git a/IOPO/Method-IOPO/examples/qwen2_lora_iopo.yaml b/IOPO/Method-IOPO/examples/qwen2_lora_iopo.yaml
@@ -0,0 +1,44 @@
+### model
+model_name_or_path: models/qwen2_lora_sft-trace-0913-cpt7500
+### method
+stage: iopo
+do_train: true
+finetuning_type: lora
+lora_target: all
+pref_beta: 0.1
+pref_loss: sigmoid  # choices: [sigmoid (dpo), orpo, simpo]
+simpo_gamma: 1.0
+deepspeed: examples/deepspeed/ds_z3_offload_config.json
+
+### dataset
+dataset: trace_iopo
+template: qwen
+cutoff_len: 6000
+max_samples: 120000
+overwrite_cache: true
+preprocessing_num_workers: 16
+
+### output
+output_dir: saves/qwen2-7b/lora/trace_iopo_1015
+logging_steps: 100
+save_steps: 500
+plot_loss: true
+overwrite_output_dir: true
+
+### train
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 8
+learning_rate: 5.0e-6
+num_train_epochs: 3.0
+lr_scheduler_type: cosine
+warmup_ratio: 0.1
+bf16: true
+ddp_timeout: 180000000
+
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+eval_strategy: steps
+eval_steps: 500
+
+report_to: none
diff --git a/IOPO/Method-IOPO/requirements.txt b/IOPO/Method-IOPO/requirements.txt
@@ -0,0 +1,21 @@
+transformers>=4.41.2,<=4.43.4
+datasets>=2.16.0,<=2.20.0
+accelerate>=0.30.1,<=0.32.0
+peft>=0.11.1,<=0.12.0
+trl>=0.8.6,<=0.9.6
+gradio>=4.0.0
+pandas>=2.0.0
+scipy
+einops
+sentencepiece
+tiktoken
+protobuf
+uvicorn
+pydantic
+fastapi
+sse-starlette
+matplotlib>=3.7.0
+fire
+packaging
+pyyaml
+numpy<2.0.0
diff --git a/IOPO/Method-IOPO/setup.py b/IOPO/Method-IOPO/setup.py
@@ -0,0 +1,93 @@
+# Copyright 2024 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+
+from setuptools import find_packages, setup
+
+
+def get_version():
+    with open(os.path.join("src", "llamafactory", "extras", "env.py"), "r", encoding="utf-8") as f:
+        file_content = f.read()
+        pattern = r"{}\W*=\W*\"([^\"]+)\"".format("VERSION")
+        (version,) = re.findall(pattern, file_content)
+        return version
+
+
+def get_requires():
+    with open("requirements.txt", "r", encoding="utf-8") as f:
+        file_content = f.read()
+        lines = [line.strip() for line in file_content.strip().split("\n") if not line.startswith("#")]
+        return lines
+
+
+extra_require = {
+    "torch": ["torch>=1.13.1"],
+    "torch-npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"],
+    "metrics": ["nltk", "jieba", "rouge-chinese"],
+    "deepspeed": ["deepspeed>=0.10.0"],
+    "bitsandbytes": ["bitsandbytes>=0.39.0"],
+    "hqq": ["hqq"],
+    "eetq": ["eetq"],
+    "gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"],
+    "awq": ["autoawq"],
+    "aqlm": ["aqlm[gpu]>=1.1.0"],
+    "vllm": ["vllm>=0.4.3"],
+    "galore": ["galore-torch"],
+    "badam": ["badam>=1.2.1"],
+    "adam-mini": ["adam-mini"],
+    "qwen": ["transformers_stream_generator"],
+    "modelscope": ["modelscope"],
+    "dev": ["ruff", "pytest"],
+}
+
+
+def main():
+    setup(
+        name="llamafactory",
+        version=get_version(),
+        author="hiyouga",
+        author_email="hiyouga" "@" "buaa.edu.cn",
+        description="Easy-to-use LLM fine-tuning framework",
+        long_description=open("README.md", "r", encoding="utf-8").read(),
+        long_description_content_type="text/markdown",
+        keywords=["LLaMA", "BLOOM", "Falcon", "LLM", "ChatGPT", "transformer", "pytorch", "deep learning"],
+        license="Apache 2.0 License",
+        url="https://github.com/hiyouga/LLaMA-Factory",
+        package_dir={"": "src"},
+        packages=find_packages("src"),
+        python_requires=">=3.8.0",
+        install_requires=get_requires(),
+        extras_require=extra_require,
+        entry_points={"console_scripts": ["llamafactory-cli = llamafactory.cli:main"]},
+        classifiers=[
+            "Development Status :: 4 - Beta",
+            "Intended Audience :: Developers",
+            "Intended Audience :: Education",
+            "Intended Audience :: Science/Research",
+            "License :: OSI Approved :: Apache Software License",
+            "Operating System :: OS Independent",
+            "Programming Language :: Python :: 3",
+            "Programming Language :: Python :: 3.8",
+            "Programming Language :: Python :: 3.9",
+            "Programming Language :: Python :: 3.10",
+            "Programming Language :: Python :: 3.11",
+            "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        ],
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/IOPO/Method-IOPO/src/.DS_Store b/IOPO/Method-IOPO/src/.DS_Store