diff --git a/src/llmcompressor/modifiers/quantization/gptq/base.py b/src/llmcompressor/modifiers/quantization/gptq/base.py index 8c66c272b..0c3c96d34 100644 --- a/src/llmcompressor/modifiers/quantization/gptq/base.py +++ b/src/llmcompressor/modifiers/quantization/gptq/base.py @@ -120,7 +120,7 @@ class GPTQModifier(Modifier, HooksMixin): disable_quantization_observer_epoch: Optional[float] = None # private variables - _quantization_modifier: Optional[QuantizationModifier] = PrivateAttr() + _quantization_modifier: Optional[QuantizationModifier] = PrivateAttr(default=None) _hessians: Dict[torch.nn.Module, torch.Tensor] = PrivateAttr(default_factory=dict) _num_samples: Dict[torch.nn.Module, int] = PrivateAttr(default_factory=dict) _update_size: Optional[int] = PrivateAttr(default=None) diff --git a/tests/llmcompressor/modifiers/quantization/gptq/utils/test_gptq_wrapper.py b/tests/llmcompressor/modifiers/quantization/gptq/utils/test_gptq_wrapper.py deleted file mode 100644 index 203d1fe03..000000000 --- a/tests/llmcompressor/modifiers/quantization/gptq/utils/test_gptq_wrapper.py +++ /dev/null @@ -1,41 +0,0 @@ -from collections import OrderedDict - -import torch -from compressed_tensors.quantization.lifecycle.apply import apply_quantization_config -from compressed_tensors.quantization.quant_config import QuantizationConfig -from compressed_tensors.quantization.quant_scheme import preset_name_to_scheme -from loguru import logger - -from llmcompressor.modifiers.quantization.gptq.utils.gptq_wrapper import GPTQWrapper - - -def test_ignore(): - model = torch.nn.Sequential( - OrderedDict( - [ - ("first_layer", torch.nn.Linear(2, 3)), - ("second_layer", torch.nn.Linear(3, 5)), - ] - ) - ) - - config = QuantizationConfig( - config_groups={"group_0": preset_name_to_scheme("W8A8", targets=["Linear"])}, - ignore=["first_layer"], - ) - apply_quantization_config(model, config) - - messages = [] - logger.add(lambda m: messages.append(m)) - - with torch.no_grad(): - first_compressor = GPTQWrapper("first_layer", model.first_layer) - first_compressor.add_batch(torch.ones(2), None) - first_compressor.compress() - - second_compressor = GPTQWrapper("second_layer", model.second_layer) - second_compressor.add_batch(torch.ones(3), None) - second_compressor.compress() - - assert sum("Skipping unquantized layer first_layer" in m for m in messages) == 1 - assert sum("Skipping unquantized layer second_layer" in m for m in messages) == 0