From d8c1db2f568d4bcc254bc046036acf0d6bba8373 Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Tue, 24 Dec 2024 19:36:00 +0800 Subject: [PATCH] enable non-cuda awq model support without modify version (#35334) Signed-off-by: jiqing-feng --- src/transformers/quantizers/quantizer_awq.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/transformers/quantizers/quantizer_awq.py b/src/transformers/quantizers/quantizer_awq.py index 4dd818f6465df9..d7a756b23a07e7 100644 --- a/src/transformers/quantizers/quantizer_awq.py +++ b/src/transformers/quantizers/quantizer_awq.py @@ -52,6 +52,10 @@ def validate_environment(self, device_map, **kwargs): if not is_accelerate_available(): raise ImportError("Loading an AWQ quantized model requires accelerate (`pip install accelerate`)") + if self.quantization_config.version == AWQLinearVersion.GEMM and not torch.cuda.is_available(): + logger.warning_once("No CUDA found, replace GEMM with IPEX version to support non-cuda AWQ model.") + self.quantization_config.version = AWQLinearVersion.IPEX + if self.quantization_config.version == AWQLinearVersion.IPEX: if version.parse(importlib.metadata.version("autoawq")) < version.parse("0.2.6"): raise RuntimeError(