From ccf56ae8448430b9049fbfa868f06b1e6726e218 Mon Sep 17 00:00:00 2001
From: George Ohashi <george@neuralmagic.com>
Date: Thu, 12 Dec 2024 19:08:21 -0500
Subject: [PATCH] use init_empty_weights

---
 .../llmcompressor/transformers/kv_cache/test_kv_cache.py  | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/llmcompressor/transformers/kv_cache/test_kv_cache.py b/tests/llmcompressor/transformers/kv_cache/test_kv_cache.py
index ce700b84..40f5634b 100644
--- a/tests/llmcompressor/transformers/kv_cache/test_kv_cache.py
+++ b/tests/llmcompressor/transformers/kv_cache/test_kv_cache.py
@@ -4,6 +4,7 @@
 import unittest
 
 import torch
+from accelerate import init_empty_weights
 from compressed_tensors.quantization.lifecycle import KVCacheScaleType
 from compressed_tensors.quantization.utils.helpers import iter_named_quantizable_modules
 from datasets import load_dataset
@@ -93,7 +94,8 @@ def test_kv_cache_config_format(self):
 
     def test_kv_cache_model_state_dict_attr(self):
         for output_dir in self.model_args.keys():
-            model = AutoModelForCausalLM.from_pretrained(output_dir)
+            with init_empty_weights():
+                model = AutoModelForCausalLM.from_pretrained(output_dir)
 
             counts = 0
             for name, submodule in iter_named_quantizable_modules(
@@ -203,8 +205,8 @@ def tokenize(sample):
 
         # Check for vllm loading
         self.assertEqual(quant_config["quant_method"], "compressed-tensors")
-
-        model = AutoModelForCausalLM.from_pretrained(output_dir)
+        with init_empty_weights():
+            model = AutoModelForCausalLM.from_pretrained(output_dir)
 
         counts = 0
         for name, submodule in iter_named_quantizable_modules(