Add a skip_checks option to OpQuantizationConfig.

PiperOrigin-RevId: 653687329
google-ai-edge · Jul 18, 2024 · 42300e1 · 42300e1
1 parent 2d9afef
commit 42300e1
Show file tree

Hide file tree

Showing 4 changed files with 49 additions and 15 deletions.
diff --git a/ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py b/ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py
@@ -2,6 +2,7 @@
 
 
 from typing import Any, Optional
+from absl import logging
 import numpy as np
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
@@ -28,6 +29,9 @@ def check_op_quantization_config(
   Raises:
     ValueError: If the op quantization config is invalid.
   """
+  if op_quant_config.skip_checks:
+    return
+
   if op_quant_config.weight_tensor_config.dtype != qtyping.TensorDataType.INT:
     raise ValueError(
         "Weights need to have integer type for min/max uniform quantization. If"

diff --git a/ai_edge_quantizer/calibrator.py b/ai_edge_quantizer/calibrator.py
@@ -104,12 +104,14 @@ def calibrate(
         for op in subgraph.operators:
           op_code = op_codes[op.opcodeIndex].builtinCode
           if op_code not in tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME:
-            raise ValueError(
-                "Full integer calibration requires all ops in the model to be"
-                " supported. Encounter unsupported op code: %s. Please add the"
-                " op to Algorithm Manager." % op_code
-            )
-          op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
+            # raise ValueError(
+            #     "Full integer calibration requires all ops in the model to be"
+            #     " supported. Encounter unsupported op code: %s. Please add the"
+            #     " op to Algorithm Manager." % op_code
+            # )
+            op_key = -1
+          else:
+            op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
           # Step2.1: query the quantization_recipe to get op quantization
           # settings.
           op_scope = self._get_op_scope(op, subgraph.tensors)
@@ -208,12 +210,14 @@ def _initialize_model_qsvs(
       for subgraph_op_id, op in enumerate(subgraph.operators):
         op_code = op_codes[op.opcodeIndex].builtinCode
         if op_code not in tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME:
-          raise ValueError(
-              "Full integer calibration requires all ops in the model to be"
-              " supported. Encounter unsupported op code: %s. Please add the"
-              " op to Algorithm Manager." % op_code
-          )
-        op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
+        #   raise ValueError(
+        #       "Full integer calibration requires all ops in the model to be"
+        #       " supported. Encounter unsupported op code: %s. Please add the"
+        #       " op to Algorithm Manager." % op_code
+        #   )
+          op_key = -1
+        else:
+          op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
         # Step1: query the quantization_recipe to get op quantization
         # settings.
         op_scope = self._get_op_scope(op, subgraph.tensors)

diff --git a/ai_edge_quantizer/params_generator.py b/ai_edge_quantizer/params_generator.py
@@ -7,6 +7,7 @@
 from ai_edge_quantizer import qtyping
 from ai_edge_quantizer import recipe_manager
 from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+from pprint import pprint
 
 
 class ParamsGenerator:
@@ -229,6 +230,9 @@ def _check_buffer_sharing(self) -> None:
             ' buffer. Please modify your quantization recipe to make sure the'
             ' two tensors have the same quantization settings.'
         )
+        # print('..........................................')
+        # pprint(first_tensor_params)
+        # pprint(tensor_params)
         if not _compatible_tensor_transformation_params(
             first_tensor_params, tensor_params
         ):
@@ -287,23 +291,29 @@ def _compatible_tensor_transformation_params(
   """Check if two tensor transformation params are compatible."""
   if params1.producer is None or params2.producer is None:
     if params1.producer != params2.producer:
+      print('a')
       return False
   elif not _compatible_tensor_params(params1.producer, params2.producer):
+    print('b')
     return False
   if params1.consumers is None or params2.consumers is None:
     if params1.consumers != params2.consumers:
+      print('c')
       return False
   else:
     # Check all consumers within each params are compatible.
     for params1_consumer in params1.consumers:
       if not _compatible_tensor_params(params1_consumer, params1.consumers[0]):
+        print('d')
         return False
     for params2_consumer in params2.consumers:
       if not _compatible_tensor_params(params2_consumer, params2.consumers[0]):
+        print('e')
         return False
     if not _compatible_tensor_params(
         params1.consumers[0], params2.consumers[0]
     ):
+      print('f')
       return False
   return True
 
@@ -321,9 +331,18 @@ def _compatible_tensor_params(
       qtyping.QuantTransformation.QUANTIZE_TENSOR,
       qtyping.QuantTransformation.ADD_DEQUANTIZE,
   ]
-  if params1.parameters != params2.parameters:
-    return False
-  # We only need to check the first transformation because transformations are
+  if (
+      params1.transformations[0] != qtyping.QuantTransformation.NO_QUANTIZE
+      and params2.transformations[0] != qtyping.QuantTransformation.NO_QUANTIZE
+  ):
+    # NO_QUANTIZE has no parameters. So only if both params aren't NO_QUANTIZE
+    # do we expect the parameters to be the same.
+    if params1.parameters != params2.parameters:
+      print('i')
+      print(params1)
+      print(params2)
+      return False
+  # we only need to check the first transformation because transformations are
   # applied in order, and as long as the one that's immediately after the tensor
   # is the same, it's compatible.
   if (
@@ -336,4 +355,5 @@ def _compatible_tensor_params(
       and params2.transformations[0] in quantized_source_transformations
   ):
     return True
+  print('ii')
   return False
diff --git a/ai_edge_quantizer/qtyping.py b/ai_edge_quantizer/qtyping.py
@@ -246,6 +246,7 @@ class OpQuantizationConfig:
     weight_tensor_config: The quantization configuration for weight tensor in
       the op.
     execution_mode: How to execute the op after quantization.
+    skip_checks: Skip op quantization config checks.
   """
 
   # Quant config for activation tensors in the op (i.e., runtime tensors).
@@ -262,6 +263,11 @@ class OpQuantizationConfig:
   # How to execute the op after quantization.
   execution_mode: OpExecutionMode = OpExecutionMode.WEIGHT_ONLY
 
+  # For advanced users only. If set, the quantizer will ignore all op
+  # configuration checks and forcefully quantize this op according to the user
+  # instructions even if it's not supported in the TFLite runtime.
+  skip_checks: bool = False
+
   def __post_init__(self):
     if self.activation_tensor_config is None:
       return