Add full integer quantization for SLICE in Quantizer

PiperOrigin-RevId: 696277110
google-ai-edge · Nov 14, 2024 · 6e78a69 · 6e78a69
1 parent ef8ea08
commit 6e78a69
Show file tree

Hide file tree

Showing 8 changed files with 237 additions and 2 deletions.
diff --git a/ai_edge_quantizer/algorithm_manager.py b/ai_edge_quantizer/algorithm_manager.py
@@ -89,6 +89,7 @@ class AlgorithmName(str, enum.Enum):
         _TFLOpName.STRIDED_SLICE,
         _TFLOpName.SPLIT,
         _TFLOpName.LOGISTIC,  # Sigmoid
+        _TFLOpName.SLICE,
     ),
     (
         naive_min_max_quantize.materialize_input,
@@ -114,6 +115,7 @@ class AlgorithmName(str, enum.Enum):
         naive_min_max_quantize.materialize_strided_slice,
         naive_min_max_quantize.materialize_split,
         naive_min_max_quantize.materialize_softmax_and_logistic,
+        naive_min_max_quantize.materialize_slice,
     ),
 ):
   register_quantized_op(

diff --git a/ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py b/ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize.py
@@ -307,6 +307,21 @@ def _are_weights_too_small(
   )
 
 
+def materialize_slice(
+    op_info: qtyping.OpInfo,
+    graph_info: qtyping.GraphInfo,
+    tensor_name_to_qsv: dict[str, Any],
+) -> list[qtyping.TensorTransformationParams]:
+  """Materialize tensors in tfl.slice."""
+  return utils.materialize_standard_op(
+      op_info,
+      graph_info,
+      tensor_name_to_qsv,
+      constraint=_OpQuantConstraint.SAME_AS_INPUT_SCALE,
+      inputs_to_ignore=[1, 2],  # Axes index does not need to be quantized.
+  )
+
+
 def materialize_fc_conv(
     op_info: qtyping.OpInfo,
     graph_info: qtyping.GraphInfo,

diff --git a/ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_op_tests/slice_test.py b/ai_edge_quantizer/algorithms/uniform_quantize/naive_min_max_quantize_op_tests/slice_test.py
@@ -0,0 +1,110 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import os
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.platform import googletest
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer.algorithms.uniform_quantize import naive_min_max_quantize
+from ai_edge_quantizer.algorithms.uniform_quantize.naive_min_max_quantize_op_tests import test_utils as naive_min_max_test_utils
+from ai_edge_quantizer.utils import test_utils
+from ai_edge_quantizer.utils import tfl_flatbuffer_utils
+
+_TFLOpName = qtyping.TFLOperationName
+_ComputePrecision = qtyping.ComputePrecision
+_TensorQuantConfig = qtyping.TensorQuantizationConfig
+_QuantTransformation = qtyping.QuantTransformation
+_OpTestInfo = naive_min_max_test_utils.OpTestInfo
+
+_TEST_DATA_PREFIX_PATH = test_utils.get_path_to_datafile(
+    "../../../tests/models"
+)
+_DEFAULT_ACTIVATION_QUANT_SETTING = (
+    naive_min_max_test_utils.DEFAULT_ACTIVATION_QUANT_SETTING
+)
+_DEFAULT_WEIGHT_QUANT_SETTING = (
+    naive_min_max_test_utils.DEFAULT_WEIGHT_QUANT_SETTING
+)
+
+
+class SliceTest(naive_min_max_test_utils.NaiveMinMaxQuantizeTest):
+
+  def setUp(self):
+    super().setUp()
+    np.random.seed(666)
+    self._test_model_path = os.path.join(
+        _TEST_DATA_PREFIX_PATH, "single_slice.tflite"
+    )
+    self._op_test_info = _OpTestInfo(
+        test_model=tfl_flatbuffer_utils.read_model(self._test_model_path),
+        op_tensor_names={},
+        input_range=(np.array([[-10]]), np.array([[8]])),
+        output_range=(np.array([[10]]), np.array([[88]])),
+    )
+    # The test model has one subgraph for now.
+    self._graph_info = qtyping.GraphInfo(
+        subgraph_tensors=self._op_test_info.test_model.subgraphs[0].tensors,
+        buffers=self._op_test_info.test_model.buffers,
+    )
+
+  @parameterized.parameters(
+      (_DEFAULT_ACTIVATION_QUANT_SETTING),
+      (
+          _TensorQuantConfig(
+              num_bits=16,
+              symmetric=True,
+              granularity=qtyping.QuantGranularity.TENSORWISE,
+          )
+      ),
+  )
+  def test_materialize_slice_succeeds(self, activation_tensor_config):
+    op_quant_config = qtyping.OpQuantizationConfig(
+        activation_tensor_config=activation_tensor_config,
+        weight_tensor_config=_DEFAULT_WEIGHT_QUANT_SETTING,
+        compute_precision=_ComputePrecision.INTEGER,  # SRQ.
+    )
+    # Read from Model Explorer.
+    subgraph0 = self._op_test_info.test_model.subgraphs[0]
+    subgraph_op_id = 0
+    op = subgraph0.operators[subgraph_op_id]
+    op_info = qtyping.OpInfo(
+        op=op,
+        op_name=qtyping.TFLOperationName.SLICE,
+        subgraph_op_index=subgraph_op_id,
+        op_quant_config=op_quant_config,
+    )
+
+    # Test settings.
+    op_tensor_names = {}
+    op_tensor_names["input"] = "slice_input_tensor:0"
+    op_tensor_names["input2"] = "slice_begin:0"
+    op_tensor_names["input3"] = "slice_size:0"
+    op_tensor_names["output"] = "PartitionedCall:0"
+    self._op_test_info.op_tensor_names = op_tensor_names
+    self._test_no_weights_op(
+        op_info,
+        self._graph_info,
+        self._op_test_info,
+        naive_min_max_quantize.materialize_slice,
+        same_input_output_params=True,
+        inputs_to_ignore=[1, 2],  # Ignore tensors.
+    )
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/ai_edge_quantizer/default_policy.py b/ai_edge_quantizer/default_policy.py
@@ -163,7 +163,8 @@
       "TANH",
       "TRANSPOSE",
       "INPUT",
-      "OUTPUT"
+      "OUTPUT",
+      "SLICE"
     ],
     "static_wi8_ai8": [
       "ADD",
@@ -187,7 +188,8 @@
       "TANH",
       "TRANSPOSE",
       "INPUT",
-      "OUTPUT"
+      "OUTPUT",
+      "SUM"
     ],
     "static_wi4_ai8": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT"],
     "static_wi4_ai16": ["FULLY_CONNECTED", "CONV_2D", "INPUT", "OUTPUT"],

diff --git a/ai_edge_quantizer/qtyping.py b/ai_edge_quantizer/qtyping.py
@@ -57,6 +57,7 @@ class TFLOperationName(str, enum.Enum):
   STRIDED_SLICE = 'STRIDED_SLICE'
   SPLIT = 'SPLIT'
   LOGISTIC = 'LOGISTIC'
+  SLICE = 'SLICE'
 
 
 class QuantizeMode(enum.Enum):

diff --git a/ai_edge_quantizer/tests/end_to_end_tests/slice_test.py b/ai_edge_quantizer/tests/end_to_end_tests/slice_test.py
@@ -0,0 +1,104 @@
+# Copyright 2024 The AI Edge Quantizer Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""E2E tests for the quantizer for model with slice."""
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.platform import googletest
+from ai_edge_quantizer import qtyping
+from ai_edge_quantizer import quantizer
+from ai_edge_quantizer.utils import test_utils
+
+_OpExecutionMode = qtyping.OpExecutionMode
+_OpName = qtyping.TFLOperationName
+_TensorQuantConfig = qtyping.TensorQuantizationConfig
+_OpQuantConfig = qtyping.OpQuantizationConfig
+
+_RNG = np.random.default_rng(66)
+
+
+def _get_dummy_data(num_samples):
+  data = []
+  for _ in range(num_samples):
+    data.append({
+        'input_tensor': _RNG.uniform(size=(32, 24, 32)).astype(np.float32),
+        'begin': np.array([1, 0, 0], dtype=np.int32),
+        'size': np.array([16, 8, 16], dtype=np.int32),
+    })
+  return data
+
+
+def _get_calibration_data(num_samples: int = 64):
+  calibration_samples = _get_dummy_data(num_samples)
+  calibration_data = {
+      'slice': calibration_samples,
+  }
+  return calibration_data
+
+
+def _get_test_data(num_samples: int = 8):
+  return _get_calibration_data(num_samples)
+
+
+class SliceTest(parameterized.TestCase):
+
+  def _custom_setup(self, test_model_file):
+    super().setUp()
+    self.float_model_path = test_utils.get_path_to_datafile(
+        f'../models/{test_model_file}'
+    )
+    self._quantizer = quantizer.Quantizer(self.float_model_path)
+
+  @parameterized.parameters(
+      '../../recipes/default_a8w8_recipe.json',
+      '../../recipes/default_a16w8_recipe.json',
+  )
+  def test_slice_model_full_integer(self, recipe_path):
+    self._custom_setup('single_slice.tflite')
+    recipe_path = test_utils.get_path_to_datafile(recipe_path)
+    self._quantizer.load_quantization_recipe(recipe_path)
+    self.assertTrue(self._quantizer.need_calibration)
+    calibration_result = self._quantizer.calibrate(_get_calibration_data())
+    _ = self._quantizer.quantize(calibration_result).export_model(
+        '/tmp/slice_quantized.tflite'
+    )
+    # Skip model size check because the quantized model doesn't decrease as
+    # there are no weights in the model file.
+
+    comparion_result = self._quantizer.validate(
+        error_metrics='mse', test_data=_get_test_data(num_samples=1)
+    )
+    self._check_comparion_result(
+        comparion_result,
+        output_tolerance=1e-4,
+    )
+
+  # TODO: b/345503484 - Check weight tensor type of the quantized model.
+  def _check_comparion_result(
+      self,
+      comparion_result,
+      output_tolerance,
+  ):
+    # TODO: b/357959309 - Use comparison result directly for testing.
+    comparion_result = comparion_result.get_all_tensor_results()
+    # Check final output.
+    output_mse = comparion_result['PartitionedCall:0']
+    self.assertLess(output_mse, output_tolerance)
+
+
+if __name__ == '__main__':
+  googletest.main()
diff --git a/ai_edge_quantizer/tests/models/single_slice.tflite b/ai_edge_quantizer/tests/models/single_slice.tflite
diff --git a/ai_edge_quantizer/utils/tfl_flatbuffer_utils.py b/ai_edge_quantizer/utils/tfl_flatbuffer_utils.py
@@ -59,6 +59,7 @@
     _TFLOpName.STRIDED_SLICE: schema_py_generated.BuiltinOperator.STRIDED_SLICE,
     _TFLOpName.SPLIT: schema_py_generated.BuiltinOperator.SPLIT,
     _TFLOpName.LOGISTIC: schema_py_generated.BuiltinOperator.LOGISTIC,
+    _TFLOpName.SLICE: schema_py_generated.BuiltinOperator.SLICE,
 })
 
 TFL_OP_CODE_TO_NAME = immutabledict.immutabledict(