Fix maximum seqlen for gptq quantization (#1748)

fix gptq calibration data
huggingface · Mar 18, 2024 · 9ff5ea8 · 9ff5ea8
1 parent d87efb2
commit 9ff5ea8
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/optimum/gptq/quantizer.py b/optimum/gptq/quantizer.py
@@ -354,7 +354,8 @@ def quantize_model(self, model: nn.Module, tokenizer: Optional[Any] = None):
             self.use_cuda_fp16 = model.dtype == torch.float16
 
         if self.model_seqlen is None:
-            self.model_seqlen = get_seqlen(model)
+            # We allow a max value of 4028 to avoid passing data with huge length to the model during the calibration step
+            self.model_seqlen = min(4028, get_seqlen(model))
 
         device = get_device(model)