merge main

vllm-project · Nov 20, 2024 · 1c54e80 · 1c54e80
1 parent 21f715d
commit 1c54e80
Show file tree

Hide file tree

Showing 4 changed files with 4 additions and 71 deletions.
diff --git a/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_group-128_recipe.yaml b/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_group-128_recipe.yaml
diff --git a/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_recipe.yaml b/examples/quantization_2of4_sparse_w4a16/2of4_w4a16_recipe.yaml
diff --git a/examples/quantization_w8a8_fp8/llama3_example.py b/examples/quantization_w8a8_fp8/llama3_example.py
@@ -3,7 +3,9 @@
 from llmcompressor.modifiers.quantization import QuantizationModifier
 from llmcompressor.transformers import oneshot
 
-MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
+# MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
+# MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+MODEL_ID = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
 
 # Load model.
 model = AutoModelForCausalLM.from_pretrained(

diff --git a/src/llmcompressor/pytorch/model_load/helpers.py b/src/llmcompressor/pytorch/model_load/helpers.py
@@ -213,7 +213,7 @@ def load_safetensors_state_dict(file_path: str) -> Dict[str, torch.Tensor]:
         return {key: f.get_tensor(key) for key in f.keys()}
 
 
-def copy_python_files_from_model_cache(model, save_path: str):
+def copy_python_files_from_model_cache(model: Module, save_path: str):
     config = model.config
     cache_path = None
     if hasattr(config, "_name_or_path"):