better inference

vllm-project · Dec 14, 2024 · 691bac4 · 691bac4
1 parent f24a2af
commit 691bac4
Showing 1 changed file with 6 additions and 8 deletions.
diff --git a/src/llmcompressor/modifiers/quantization/gptq/base.py b/src/llmcompressor/modifiers/quantization/gptq/base.py
@@ -218,15 +218,13 @@ def on_initialize(self, state: "State", **kwargs) -> bool:
             )
             return True
 
-        # failure to trace
-        except torch.fx.proxy.TraceError:
-            model_name = state.model.__class__.__name__
-            column_names = state.data.calib.dataset.column_names
-            warnings.warn(
-                f"Failed to trace {model_name} with dataset {column_names}. "
-                "Falling back to layer_sequential pipeline"
-            )
+        except Exception as exception:
+            if isinstance(exception, torch.fx.proxy.TraceError):
+                model_name = state.model.__class__.__name__
+                column_names = state.data.calib.dataset.column_names
+                warnings.warn(f"Failed to trace {model_name} with {column_names}")
 
+            warnings.warn("Falling back to layer_sequential pipeline")
             try:
                 run_layer_sequential(
                     state.model,