diff --git a/src/transformers/quantizers/quantizer_compressed_tensors.py b/src/transformers/quantizers/quantizer_compressed_tensors.py index 5064f2c019d74e..7d208087bbbfec 100644 --- a/src/transformers/quantizers/quantizer_compressed_tensors.py +++ b/src/transformers/quantizers/quantizer_compressed_tensors.py @@ -37,6 +37,13 @@ class CompressedTensorsHfQuantizer(HfQuantizer): def __init__(self, quantization_config: CompressedTensorsConfig, **kwargs): super().__init__(quantization_config, **kwargs) + + if not is_compressed_tensors_available(): + raise ImportError( + "Using `compressed_tensors` quantized models requires the compressed-tensors library: " + "`pip install compressed-tensors`" + ) + from compressed_tensors.compressors import ModelCompressor self.compressor = ModelCompressor.from_compression_config(quantization_config) diff --git a/src/transformers/quantizers/quantizer_gptq.py b/src/transformers/quantizers/quantizer_gptq.py index bf5079435d63b2..d47a2ba79cb60d 100644 --- a/src/transformers/quantizers/quantizer_gptq.py +++ b/src/transformers/quantizers/quantizer_gptq.py @@ -44,18 +44,25 @@ class GptqHfQuantizer(HfQuantizer): def __init__(self, quantization_config: QuantizationConfigMixin, **kwargs): super().__init__(quantization_config, **kwargs) + + if not is_optimum_available(): + raise ImportError("Loading a GPTQ quantized model requires optimum (`pip install optimum`)") from optimum.gptq import GPTQQuantizer self.optimum_quantizer = GPTQQuantizer.from_dict(self.quantization_config.to_dict_optimum()) def validate_environment(self, *args, **kwargs): + if not is_optimum_available(): + raise ImportError("Loading a GPTQ quantized model requires optimum (`pip install optimum`)") + + if not is_auto_gptq_available(): + raise ImportError( + "Loading a GPTQ quantized model requires the auto-gptq library (`pip install auto-gptq`)" + ) + gptq_supports_cpu = version.parse(importlib.metadata.version("auto-gptq")) > version.parse("0.4.2") if not gptq_supports_cpu and not torch.cuda.is_available(): raise RuntimeError("GPU is required to quantize or run quantize model.") - elif not (is_optimum_available() and is_auto_gptq_available()): - raise ImportError( - "Loading a GPTQ quantized model requires optimum (`pip install optimum`) and auto-gptq library (`pip install auto-gptq`)" - ) elif version.parse(importlib.metadata.version("auto_gptq")) < version.parse("0.4.2"): raise ImportError( "You need a version of auto_gptq >= 0.4.2 to use GPTQ: `pip install --upgrade auto-gptq`"