Merge branch 'mvafin/support_awq' of https://github.com/mvafin/optimu…

…m-intel into mvafin/support_awq
huggingface · Dec 20, 2024 · 630d36a · 630d36a
2 parents b0e4860 + 5d8bcb7
commit 630d36a
Showing 1 changed file with 55 additions and 0 deletions.
diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
@@ -228,6 +228,61 @@ def get_num_quantized_nodes(model):
     return num_fake_quantize, num_weight_nodes
 
 
+@contextmanager
+def mock_torch_cuda_is_available(to_patch):
+    original_is_available = torch.cuda.is_available
+    if to_patch:
+        torch.cuda.is_available = lambda: True
+    try:
+        yield
+    finally:
+        if to_patch:
+            torch.cuda.is_available = original_is_available
+
+
+@contextmanager
+def patch_awq_for_inference(to_patch):
+    orig_gemm_forward = None
+    if to_patch:
+        # patch GEMM module to allow inference without CUDA GPU
+        from awq.modules.linear.gemm import WQLinearMMFunction
+        from awq.utils.packing_utils import dequantize_gemm
+
+        def new_forward(
+            ctx,
+            x,
+            qweight,
+            qzeros,
+            scales,
+            w_bit=4,
+            group_size=128,
+            bias=None,
+            out_features=0,
+        ):
+            ctx.out_features = out_features
+
+            out_shape = x.shape[:-1] + (out_features,)
+            x = x.to(torch.float16)
+
+            out = dequantize_gemm(qweight, qzeros, scales, w_bit, group_size)
+            out = torch.matmul(x, out)
+
+            out = out + bias if bias is not None else out
+            out = out.reshape(out_shape)
+
+            if len(out.shape) == 2:
+                out = out.unsqueeze(0)
+            return out
+
+        orig_gemm_forward = WQLinearMMFunction.forward
+        WQLinearMMFunction.forward = new_forward
+    try:
+        yield
+    finally:
+        if orig_gemm_forward is not None:
+            WQLinearMMFunction.forward = orig_gemm_forward
+
+
 def compare_num_quantized_nodes_per_model(
     test_case: unittest.TestCase,
     models: List[Union[ov.Model, OVBaseModel]],