Skip to content

Commit

Permalink
Add a skip_checks option to OpQuantizationConfig.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 653687329
  • Loading branch information
paulinesho authored and copybara-github committed Jul 18, 2024
1 parent 2d9afef commit 42300e1
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@


from typing import Any, Optional
from absl import logging
import numpy as np
from ai_edge_quantizer import qtyping
from ai_edge_quantizer.algorithms.uniform_quantize import uniform_quantize_tensor
Expand All @@ -28,6 +29,9 @@ def check_op_quantization_config(
Raises:
ValueError: If the op quantization config is invalid.
"""
if op_quant_config.skip_checks:
return

if op_quant_config.weight_tensor_config.dtype != qtyping.TensorDataType.INT:
raise ValueError(
"Weights need to have integer type for min/max uniform quantization. If"
Expand Down
28 changes: 16 additions & 12 deletions ai_edge_quantizer/calibrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,14 @@ def calibrate(
for op in subgraph.operators:
op_code = op_codes[op.opcodeIndex].builtinCode
if op_code not in tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME:
raise ValueError(
"Full integer calibration requires all ops in the model to be"
" supported. Encounter unsupported op code: %s. Please add the"
" op to Algorithm Manager." % op_code
)
op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
# raise ValueError(
# "Full integer calibration requires all ops in the model to be"
# " supported. Encounter unsupported op code: %s. Please add the"
# " op to Algorithm Manager." % op_code
# )
op_key = -1
else:
op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
# Step2.1: query the quantization_recipe to get op quantization
# settings.
op_scope = self._get_op_scope(op, subgraph.tensors)
Expand Down Expand Up @@ -208,12 +210,14 @@ def _initialize_model_qsvs(
for subgraph_op_id, op in enumerate(subgraph.operators):
op_code = op_codes[op.opcodeIndex].builtinCode
if op_code not in tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME:
raise ValueError(
"Full integer calibration requires all ops in the model to be"
" supported. Encounter unsupported op code: %s. Please add the"
" op to Algorithm Manager." % op_code
)
op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
# raise ValueError(
# "Full integer calibration requires all ops in the model to be"
# " supported. Encounter unsupported op code: %s. Please add the"
# " op to Algorithm Manager." % op_code
# )
op_key = -1
else:
op_key = tfl_flatbuffer_utils.TFL_OP_CODE_TO_NAME[op_code]
# Step1: query the quantization_recipe to get op quantization
# settings.
op_scope = self._get_op_scope(op, subgraph.tensors)
Expand Down
26 changes: 23 additions & 3 deletions ai_edge_quantizer/params_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from ai_edge_quantizer import qtyping
from ai_edge_quantizer import recipe_manager
from ai_edge_quantizer.utils import tfl_flatbuffer_utils
from pprint import pprint


class ParamsGenerator:
Expand Down Expand Up @@ -229,6 +230,9 @@ def _check_buffer_sharing(self) -> None:
' buffer. Please modify your quantization recipe to make sure the'
' two tensors have the same quantization settings.'
)
# print('..........................................')
# pprint(first_tensor_params)
# pprint(tensor_params)
if not _compatible_tensor_transformation_params(
first_tensor_params, tensor_params
):
Expand Down Expand Up @@ -287,23 +291,29 @@ def _compatible_tensor_transformation_params(
"""Check if two tensor transformation params are compatible."""
if params1.producer is None or params2.producer is None:
if params1.producer != params2.producer:
print('a')
return False
elif not _compatible_tensor_params(params1.producer, params2.producer):
print('b')
return False
if params1.consumers is None or params2.consumers is None:
if params1.consumers != params2.consumers:
print('c')
return False
else:
# Check all consumers within each params are compatible.
for params1_consumer in params1.consumers:
if not _compatible_tensor_params(params1_consumer, params1.consumers[0]):
print('d')
return False
for params2_consumer in params2.consumers:
if not _compatible_tensor_params(params2_consumer, params2.consumers[0]):
print('e')
return False
if not _compatible_tensor_params(
params1.consumers[0], params2.consumers[0]
):
print('f')
return False
return True

Expand All @@ -321,9 +331,18 @@ def _compatible_tensor_params(
qtyping.QuantTransformation.QUANTIZE_TENSOR,
qtyping.QuantTransformation.ADD_DEQUANTIZE,
]
if params1.parameters != params2.parameters:
return False
# We only need to check the first transformation because transformations are
if (
params1.transformations[0] != qtyping.QuantTransformation.NO_QUANTIZE
and params2.transformations[0] != qtyping.QuantTransformation.NO_QUANTIZE
):
# NO_QUANTIZE has no parameters. So only if both params aren't NO_QUANTIZE
# do we expect the parameters to be the same.
if params1.parameters != params2.parameters:
print('i')
print(params1)
print(params2)
return False
# we only need to check the first transformation because transformations are
# applied in order, and as long as the one that's immediately after the tensor
# is the same, it's compatible.
if (
Expand All @@ -336,4 +355,5 @@ def _compatible_tensor_params(
and params2.transformations[0] in quantized_source_transformations
):
return True
print('ii')
return False
6 changes: 6 additions & 0 deletions ai_edge_quantizer/qtyping.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ class OpQuantizationConfig:
weight_tensor_config: The quantization configuration for weight tensor in
the op.
execution_mode: How to execute the op after quantization.
skip_checks: Skip op quantization config checks.
"""

# Quant config for activation tensors in the op (i.e., runtime tensors).
Expand All @@ -262,6 +263,11 @@ class OpQuantizationConfig:
# How to execute the op after quantization.
execution_mode: OpExecutionMode = OpExecutionMode.WEIGHT_ONLY

# For advanced users only. If set, the quantizer will ignore all op
# configuration checks and forcefully quantize this op according to the user
# instructions even if it's not supported in the TFLite runtime.
skip_checks: bool = False

def __post_init__(self):
if self.activation_tensor_config is None:
return
Expand Down

0 comments on commit 42300e1

Please sign in to comment.