diff --git a/requirements.txt b/requirements.txt index 37ee1e42cf..46d0691b6c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ peft==0.13.2 transformers==4.45.2 tokenizers>=0.20.1 bitsandbytes==0.44.1 -accelerate==1.0.0 +accelerate==0.34.2 datasets==3.0.1 deepspeed==0.14.4 pydantic==2.6.3 diff --git a/tests/e2e/multigpu/test_llama.py b/tests/e2e/multigpu/test_llama.py index 61bb8ed327..957a6a9e36 100644 --- a/tests/e2e/multigpu/test_llama.py +++ b/tests/e2e/multigpu/test_llama.py @@ -19,6 +19,8 @@ LOG = logging.getLogger("axolotl.tests.e2e.multigpu") os.environ["WANDB_DISABLED"] = "true" +AXOLOTL_ROOT = Path(__file__).parent.parent.parent.parent + @pytest.fixture(scope="session", autouse=True) def download_model(): @@ -346,3 +348,115 @@ def test_fsdp_qlora_prequant_packed(self, temp_dir): str(Path(temp_dir) / "config.yaml"), ] ) + + @with_temp_dir + def test_ds_zero3_packed(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama_v1.1", + "tokenizer_type": "LlamaTokenizer", + "sample_packing": True, + "eval_sample_packing": False, + "pad_to_sequence_len": True, + "sequence_len": 2048, + "val_set_size": 0.05, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "tatsu-lab/alpaca", + "type": "alpaca", + }, + ], + "num_epochs": 1, + "max_steps": 100, + "micro_batch_size": 4, + "gradient_accumulation_steps": 4, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "flash_attention": True, + "deepspeed": str(AXOLOTL_ROOT / "deepspeed_configs/zero3_bf16.json"), + } + ) + + # write cfg to yaml file + Path(temp_dir).mkdir(parents=True, exist_ok=True) + with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout: + fout.write(yaml.dump(cfg.to_dict(), Dumper=yaml.Dumper)) + + execute_subprocess_async( + [ + "accelerate", + "launch", + "--num-processes", + "2", + "-m", + "axolotl.cli.train", + str(Path(temp_dir) / "config.yaml"), + ] + ) + + @with_temp_dir + def test_ds_zero3_qlora_packed(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama_v1.1", + "tokenizer_type": "LlamaTokenizer", + "load_in_4bit": True, + "adapter": "qlora", + "lora_r": 8, + "lora_alpha": 16, + "lora_dropout": 0.05, + "lora_target_linear": True, + "sample_packing": True, + "eval_sample_packing": False, + "pad_to_sequence_len": True, + "sequence_len": 2048, + "val_set_size": 0.05, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "tatsu-lab/alpaca", + "type": "alpaca", + }, + ], + "num_epochs": 1, + "max_steps": 100, + "micro_batch_size": 4, + "gradient_accumulation_steps": 4, + "output_dir": temp_dir, + "learning_rate": 0.0001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "flash_attention": True, + "deepspeed": str(AXOLOTL_ROOT / "deepspeed_configs/zero3_bf16.json"), + } + ) + + # write cfg to yaml file + Path(temp_dir).mkdir(parents=True, exist_ok=True) + with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout: + fout.write(yaml.dump(cfg.to_dict(), Dumper=yaml.Dumper)) + + execute_subprocess_async( + [ + "accelerate", + "launch", + "--num-processes", + "2", + "-m", + "axolotl.cli.train", + str(Path(temp_dir) / "config.yaml"), + ] + )