Skip to content

Commit

Permalink
merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
horheynm committed Nov 20, 2024
1 parent 21f715d commit 1c54e80
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 71 deletions.

This file was deleted.

34 changes: 0 additions & 34 deletions examples/quantization_2of4_sparse_w4a16/2of4_w4a16_recipe.yaml

This file was deleted.

4 changes: 3 additions & 1 deletion examples/quantization_w8a8_fp8/llama3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from llmcompressor.modifiers.quantization import QuantizationModifier
from llmcompressor.transformers import oneshot

MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
# MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct"
# MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
MODEL_ID = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"

# Load model.
model = AutoModelForCausalLM.from_pretrained(
Expand Down
2 changes: 1 addition & 1 deletion src/llmcompressor/pytorch/model_load/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def load_safetensors_state_dict(file_path: str) -> Dict[str, torch.Tensor]:
return {key: f.get_tensor(key) for key in f.keys()}


def copy_python_files_from_model_cache(model, save_path: str):
def copy_python_files_from_model_cache(model: Module, save_path: str):
config = model.config
cache_path = None
if hasattr(config, "_name_or_path"):
Expand Down

0 comments on commit 1c54e80

Please sign in to comment.