Skip to content

Commit

Permalink
actually make the test useful (#920)
Browse files Browse the repository at this point in the history
Co-authored-by: Kyle Sayers <[email protected]>
Signed-off-by: Kyle Sayers <[email protected]>
  • Loading branch information
dsikka and kylesayrs committed Nov 21, 2024
1 parent cdb6231 commit 50e881f
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 12 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "nm-testing/tinyllama-fp8-dynamic-compressed"
model_stub: "nm-testing/tinyllama-fp8-dynamic-compressed"
empty_model: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "nm-testing/tinyllama-w4a16-compressed"
model_stub: "nm-testing/tinyllama-w4a16-compressed"
empty_model: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "nm-testing/tinyllama-w8a16-dense"
model_stub: "nm-testing/tinyllama-w8a16-dense"
empty_model: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
cadence: "commit"
test_type: "regression"
model_stub: "nm-testing/tinyllama-w8a8-compressed"
model_stub: "nm-testing/tinyllama-w8a8-compressed"
empty_model: "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
import unittest

import torch
from compressed_tensors import QUANTIZATION_CONFIG_NAME
from compressed_tensors.compressors import ModelCompressor
from compressed_tensors.quantization import QuantizationStatus
from parameterized import parameterized_class
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer

from tests.testing_utils import parse_params, requires_gpu, requires_torch

Expand All @@ -16,6 +19,7 @@
@parameterized_class(parse_params(CONFIG_DIR))
class TestQuantizationMatches(unittest.TestCase):
model_stub = None
empty_model = None

@classmethod
def setUpClass(cls):
Expand All @@ -29,25 +33,34 @@ def setUpClass(cls):
device_map="auto",
# run_compressed=True, # TODO: Give option on HFQuantizer
)
# TODO: Use ModelCompressor until decompression is supported through
# HFQuant/run_compressed can be turned off.
cls.uncompressed_model = AutoModelForCausalLM.from_pretrained(
cls.model_stub,
torch_dtype="auto",
device_map="auto",
# run_compressed=False, # TODO: Give option on HFQuantizer
cls.empty_model,
torch_dtype=cls.compressed_model.dtype,
device_map=cls.compressed_model.device,
)
config = AutoConfig.from_pretrained(cls.model_stub)
compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
cls.compressor = ModelCompressor.from_compression_config(compression_config)
cls.compressor.quantization_config.quantization_status = (
QuantizationStatus.FROZEN
)
cls.compressor.decompress(
model_path=cls.model_stub, model=cls.uncompressed_model
)

cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_stub)
cls.device = cls.compressed_model.device

def test_compressed_matches_uncompressed(self):
SAMPLE_INPUT = [
"I love 4-bit quantization because",
"What is the capital of Paris?",
"What is the capital of France?",
"def fibonacci(n):",
]

inputs = self.tokenizer(SAMPLE_INPUT, return_tensors="pt", padding=True).to(
self.device
self.compressed_model.device
)
compressed_output = self.tokenizer.batch_decode(
self.compressed_model.generate(**inputs, max_length=50)
Expand Down

0 comments on commit 50e881f

Please sign in to comment.