From ccf56ae8448430b9049fbfa868f06b1e6726e218 Mon Sep 17 00:00:00 2001 From: George Ohashi Date: Thu, 12 Dec 2024 19:08:21 -0500 Subject: [PATCH] use init_empty_weights --- .../llmcompressor/transformers/kv_cache/test_kv_cache.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/llmcompressor/transformers/kv_cache/test_kv_cache.py b/tests/llmcompressor/transformers/kv_cache/test_kv_cache.py index ce700b84..40f5634b 100644 --- a/tests/llmcompressor/transformers/kv_cache/test_kv_cache.py +++ b/tests/llmcompressor/transformers/kv_cache/test_kv_cache.py @@ -4,6 +4,7 @@ import unittest import torch +from accelerate import init_empty_weights from compressed_tensors.quantization.lifecycle import KVCacheScaleType from compressed_tensors.quantization.utils.helpers import iter_named_quantizable_modules from datasets import load_dataset @@ -93,7 +94,8 @@ def test_kv_cache_config_format(self): def test_kv_cache_model_state_dict_attr(self): for output_dir in self.model_args.keys(): - model = AutoModelForCausalLM.from_pretrained(output_dir) + with init_empty_weights(): + model = AutoModelForCausalLM.from_pretrained(output_dir) counts = 0 for name, submodule in iter_named_quantizable_modules( @@ -203,8 +205,8 @@ def tokenize(sample): # Check for vllm loading self.assertEqual(quant_config["quant_method"], "compressed-tensors") - - model = AutoModelForCausalLM.from_pretrained(output_dir) + with init_empty_weights(): + model = AutoModelForCausalLM.from_pretrained(output_dir) counts = 0 for name, submodule in iter_named_quantizable_modules(