From 0765ec44135debde756548b0932f5ccce12da8b5 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Tue, 11 Jul 2023 11:00:56 -0500 Subject: [PATCH 01/62] Add needed layer types for QONNX --- hls4ml/model/layers.py | 136 +++++++++++++++++++++++- hls4ml/model/optimizer/passes/qkeras.py | 30 +----- 2 files changed, 132 insertions(+), 34 deletions(-) diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index d9da2cc741..6a23a9b934 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -333,7 +333,7 @@ class Input(Layer): def initialize(self): shape = self.attributes['input_shape'] if shape[0] is None: - shape = shape[1:] + raise RuntimeError(f"Unexpectedly have a None in {shape=} of Input layer") dims = [f'N_INPUT_{i}_{self.index}' for i in range(1, len(shape) + 1)] if self.index == 1: default_type_name = 'input_t' @@ -344,6 +344,41 @@ def initialize(self): self.add_output_variable(shape, dims, var_name=self.name, type_name=type_name, precision=precision) +class Constant(Layer): + _expected_attributes = [ + Attribute('value', value_type=np.ndarray), + ] + + def initialize(self): + value = self.attributes['value'] + self.value = value # note, this is unquantized; Only here for easier access + shape = value.shape + if not shape: + shape = (1,) + self.value = np.array([self.value]) + dims = [f'{self.name}_{i}' for i in range(len(shape))] + self.add_output_variable(shape, dims, var_name=self.name, precision=self.get_attr("precision")) + + +class Quant(Layer): # The QONNX quantization layer + """ + This is a QONNX quantization layer. Optimizations should convert it + before HLS is produced. + """ + + _expected_attributes = [ + Attribute('narrow', value_type=bool), + Attribute('rounding_mode', value_type=str), + Attribute('signed', value_type=bool), + ] + + def initialize(self): + inp = self.get_input_variable(self.inputs[0]) + shape = inp.shape + dims = inp.dim_names + self.add_output_variable(shape, dims) + + class Reshape(Layer): _expected_attributes = [ Attribute('target_shape', value_type=typing.Sequence), @@ -351,19 +386,20 @@ class Reshape(Layer): def initialize(self): input_shape = self.get_input_variable(self.inputs[0]).shape - target_shape = self.get_attr('target_shape') + target_shape = self.get_attr('target_shape') # this should not have a batch dimension if target_shape is None: # need to get it from the input shape_node = self.get_input_node(self.inputs[1]) # for QONNX, remove batch dimension + # (onnx cleaning should have removed reshape dimension) if shape_node: target_shape = shape_node.value[1:] else: raise RuntimeError("Reshape for ONNX requires the target shape to be a second input.") - # remove Nones -- is this ever triggered? + # nones should not exist here if target_shape[0] is None: - target_shape = target_shape[1:] + raise RuntimeError(f"Unexpectedly have a None in {target_shape=}") # take care of -1 shapes shape = self._infer_output_shape(input_shape, target_shape) @@ -395,7 +431,7 @@ class Dense(Layer): ] def initialize(self): - shape = self.get_input_variable().shape[:] + shape = list(self.get_input_variable().shape) shape[-1] = self.attributes['n_out'] if len(shape) > 1: dims = [f'N_LAYER_{i}_{self.index}' for i in range(1, len(shape) + 1)] @@ -406,6 +442,27 @@ def initialize(self): self.add_bias(quantizer=self.get_attr('bias_quantizer')) +class Conv(Layer): + """ + This is for the ONNX Conv node. Currently, it is only supported as an intermediate + form that gets converted to an explicit ConvXD. + + Note: these are always channels-last. + """ + + def initialize(self): + # use negative indexing because it is not clear if batch dimension is always stripped + if self.attributes['n_dim'] == 1: + # this is 1D convolution + shape = [self.attributes['out_width'], self.attributes['n_filt']] + dims = [f'N_OUTPUTS_{self.index}', f'N_FILT_{self.index}'] + else: + shape = [self.attributes['out_height'], self.attributes['out_width'], self.attributes['n_filt']] + dims = [f'OUT_HEIGHT_{self.index}', f'OUT_WIDTH_{self.index}', f'N_FILT_{self.index}'] + + self.add_output_variable(shape, dims) + + class Conv1D(Layer): _expected_attributes = [ Attribute('in_width'), @@ -811,6 +868,19 @@ def initialize(self): super().initialize() +class BatchNormOnnx(Layer): + ''' + A transient layer formed from ONNX BatchNormalization that gets converted to + BatchNormalization after the scale and bias are determined + ''' + + def initialize(self): + inp = self.get_input_variable() + shape = inp.shape + dims = inp.dim_names + self.add_output_variable(shape, dims) + + class BatchNormalization(Layer): _expected_attributes = [ Attribute('n_in'), @@ -841,6 +911,31 @@ def initialize(self): self.add_weights_variable(name='bias', var_name='b{index}', data=bias) +class ApplyAlpha(BatchNormalization): + '''A custom layer to scale the output of a QDense layer which used 'alpha != 1' + Inference computation uses BatchNormalization methods''' + + def initialize(self): + inp = self.get_input_variable() + shape = inp.shape + dims = inp.dim_names + self.add_output_variable(shape, dims) + + scale = self.get_attr('scale_data') + scale_quantizer = self.get_attr('scale_quantizer') + bias = self.get_attr('bias_data') + bias_quantizer = self.get_attr('bias_quantizer') + + self.add_weights(scale, quantizer=scale_quantizer) + self.add_bias(bias, quantizer=bias_quantizer) + + def add_weights(self, scale, quantizer=None): + self.add_weights_variable(name='scale', var_name='s{index}', data=scale, quantizer=quantizer) + + def add_bias(self, bias, quantizer=None): + self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer) + + class Merge(Layer): def initialize(self): assert len(self.inputs) == 2 @@ -855,6 +950,31 @@ def initialize(self): self.add_output_variable(shape, dims) +class MatMul(Layer): + """ + This is a matrix multiply. Currently, it is only supported as an intermediate + form that gets converted to a Dense layer. + """ + + def initialize(self): + assert len(self.inputs) == 2 + inp1 = self.get_input_variable(self.inputs[0]) + inp2 = self.get_input_variable(self.inputs[1]) + if len(inp2.shape) == 1: + # mat vec multiply + assert inp1.shape[-1] == inp2.shape[0] + shape = tuple(inp1.shape[:-1]) + (inp2.shape[0],) + else: + assert inp1.shape[-1] == inp2.shape[-2] + shape = tuple(inp1.shape[:-1]) + (inp2.shape[-1],) + if len(shape) > 1: + dims = [f'N_LAYER_{i}_{self.index}' for i in range(1, len(shape) + 1)] + else: + dims = [f'N_LAYER_{self.index}'] + + self.add_output_variable(shape, dims) + + class Dot(Merge): def initialize(self): assert len(self.inputs) == 2 @@ -1293,6 +1413,7 @@ def initialize(self): layer_map = { 'Input': Input, 'InputLayer': Input, + 'Constant': Constant, 'Activation': Activation, 'QActivation': Activation, 'LeakyReLU': ParametrizedActivation, @@ -1307,6 +1428,7 @@ def initialize(self): 'BinaryDense': Dense, 'TernaryDense': Dense, 'QDense': Dense, + 'Conv': Conv, 'Conv1D': Conv1D, 'QConv1D': Conv1D, 'Conv2D': Conv2D, @@ -1329,6 +1451,7 @@ def initialize(self): 'ZeroPadding1D': ZeroPadding1D, 'ZeroPadding2D': ZeroPadding2D, 'Merge': Merge, + 'MatMul': MatMul, 'Dot': Dot, 'Concatenate': Concatenate, 'Resize': Resize, @@ -1341,6 +1464,9 @@ def initialize(self): 'GRU': GRU, 'GarNet': GarNet, 'GarNetStack': GarNetStack, + 'Quant': Quant, + 'ApplyAlpha': ApplyAlpha, + 'BatchNormOnnx': BatchNormOnnx, 'LayerGroup': LayerGroup, # TensorFlow-specific layers: 'BiasAdd': BiasAdd, diff --git a/hls4ml/model/optimizer/passes/qkeras.py b/hls4ml/model/optimizer/passes/qkeras.py index cdbb56ec46..2d2b6b0f77 100644 --- a/hls4ml/model/optimizer/passes/qkeras.py +++ b/hls4ml/model/optimizer/passes/qkeras.py @@ -1,7 +1,7 @@ import numpy as np import tensorflow as tf -from hls4ml.model.layers import BatchNormalization, register_layer +from hls4ml.model.layers import ApplyAlpha, BatchNormalization from hls4ml.model.optimizer import ConfigurableOptimizerPass, OptimizerPass, register_pass from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType, QKerasPO2Quantizer @@ -76,35 +76,7 @@ def precision_string_modify(self, pstr): return pstr -class ApplyAlpha(BatchNormalization): - '''A custom layer to scale the output of a QDense layer which used 'alpha != 1' - Inference computation uses BatchNormalization methods''' - - def initialize(self): - inp = self.get_input_variable() - shape = inp.shape - dims = inp.dim_names - self.add_output_variable(shape, dims) - - scale = self.get_attr('scale_data') - scale_quantizer = self.get_attr('scale_quantizer') - bias = self.get_attr('bias_data') - bias_quantizer = self.get_attr('bias_quantizer') - - self.add_weights(scale, quantizer=scale_quantizer) - self.add_bias(bias, quantizer=bias_quantizer) - - def add_weights(self, scale, quantizer=None): - self.add_weights_variable(name='scale', var_name='s{index}', data=scale, quantizer=quantizer) - - def add_bias(self, bias, quantizer=None): - self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer) - - def register_qkeras(): - # Register the layer types to the layer map - register_layer('ApplyAlpha', ApplyAlpha) - # Register the optimization passes register_pass('output_rounding_saturation_mode', OutputRoundingSaturationMode) register_pass('qkeras_factorize_alpha', QKerasFactorizeAlpha) From ff788eae9a541e88c74e0876d405a487537632cc Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Tue, 11 Jul 2023 19:32:13 -0500 Subject: [PATCH 02/62] add qonnx pytest --- test/pytest/test_qonnx.py | 189 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100755 test/pytest/test_qonnx.py diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py new file mode 100755 index 0000000000..be567d81f9 --- /dev/null +++ b/test/pytest/test_qonnx.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python +import os +import urllib +from pathlib import Path + +import numpy as np +import pytest +import qonnx.core.onnx_exec as oxe +import qonnx.util.cleanup +import qonnx.util.to_channels_last + +# To conveniently run QONNX inference +from qonnx.core.modelwrapper import ModelWrapper + +import hls4ml + +test_root_path = Path(__file__).parent + + +def test_tfc_2w2a(): + # download test model + dl_dir = test_root_path + dl_file = str(dl_dir / "qonnx-tfc-2w2a.onnx") + tfc_w2a2_qonnx_url = ( + "https://raw.githubusercontent.com/fastmachinelearning/" + "QONNX_model_zoo/main/models/MNIST/Brevitas_FINN_TFC/TFC/TFC_2W2A.onnx" + ) + urllib.request.urlretrieve(tfc_w2a2_qonnx_url, dl_file) + assert os.path.isfile(dl_file) + out_file = str(dl_dir / "qonnx-tfc-2w2a-clean.onnx") + + # cleanup + qonnx.util.cleanup.cleanup(dl_file, out_file=out_file) + model = ModelWrapper(out_file) + + # Execute QONNX model inference + # TODO make the test bigger + ishape = (1, 1, 28, 28) + np.random.seed(0) + X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32) + idict = {model.graph.input[0].name: X} + y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] + + # Convert QONNX model, compile, and run inference + config = hls4ml.utils.config_from_onnx_model(model) + # Some hand-derived config + # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation + config['LayerName'] = {} + config['LayerName']['global_in'] = {'Precision': 'ap_fixed<16,2>'} + hls_model = hls4ml.converters.convert_from_onnx_model( + model, output_dir=str(test_root_path / 'hls4mlprj_qonnx_tfc-2w2a'), part='xcu250-figd2104-2L-e', hls_config=config + ) + hls_model.compile() + y_hls4ml = hls_model.predict(X) + + np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) + + +def test_tfc_2w2a_quartus(): + # download test model + dl_dir = test_root_path + dl_file = str(dl_dir / "qonnx-tfc-2w2a.onnx") + tfc_w2a2_qonnx_url = ( + "https://raw.githubusercontent.com/fastmachinelearning/" + "QONNX_model_zoo/main/models/MNIST/Brevitas_FINN_TFC/TFC/TFC_2W2A.onnx" + ) + urllib.request.urlretrieve(tfc_w2a2_qonnx_url, dl_file) + assert os.path.isfile(dl_file) + out_file = str(dl_dir / "qonnx-tfc-2w2a-clean.onnx") + + # cleanup + qonnx.util.cleanup.cleanup(dl_file, out_file=out_file) + model = ModelWrapper(out_file) + + # Execute QONNX model inference + # TODO make the test bigger + ishape = (1, 1, 28, 28) + np.random.seed(0) + X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32) + idict = {model.graph.input[0].name: X} + y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] + + # Convert QONNX model, compile, and run inference + config = hls4ml.utils.config_from_onnx_model(model) + # Some hand-derived config + # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation + config['LayerName'] = {} + config['LayerName']['global_in'] = {'Precision': 'ac_fixed<16,2>'} + hls_model = hls4ml.converters.convert_from_onnx_model( + model, + output_dir=str(test_root_path / 'hls4mlprj_qonnx_tfc-2w2a-quartus'), + part='Arria10', + backend='Quartus', + hls_config=config, + ) + hls_model.compile() + y_hls4ml = hls_model.predict(X) + + np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) + + +def test_cnv_2w2a(): + # download test model + dl_dir = test_root_path + dl_file = str(dl_dir / "qonnx-cnv-2w2a.onnx") + cnv_w2a2_qonnx_url = ( + "https://raw.githubusercontent.com/fastmachinelearning/" + "QONNX_model_zoo/main/models/CIFAR10/Brevitas_FINN_CNV/CNV_2W2A.onnx" + ) + urllib.request.urlretrieve(cnv_w2a2_qonnx_url, dl_file) + assert os.path.isfile(dl_file) + out_clean = str(dl_dir / "qonnx-cnv-2w2a-clean.onnx") + out_chanlast = str(dl_dir / "qonnx-cnv-2w2a-clean-channels-last.onnx") + out_file = str(dl_dir / "qonnx-cnv-2w2a-clean-channels-last-clean.onnx") + + # cleanup + qonnx.util.cleanup.cleanup(dl_file, out_file=out_clean) + qonnx.util.to_channels_last.to_channels_last(out_clean, make_input_channels_last=True, out_file=out_chanlast) + qonnx.util.cleanup.cleanup(out_chanlast, out_file=out_file) + model = ModelWrapper(out_file) + + # Execute QONNX model inference + # TODO make the test bigger + ishape = (1, 32, 32, 3) + np.random.seed(1) + X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32) + idict = {model.graph.input[0].name: X} + y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] + + # Convert QONNX model, compile, and run inference + config = hls4ml.utils.config_from_onnx_model(model) + config['Model']['Precision'] = 'ap_fixed<32,16>' + # Some hand-derived config + # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation + + hls_model = hls4ml.converters.convert_from_onnx_model( + model, + output_dir=str(test_root_path / 'hls4mlprj_qonnx_cnv-2w2a'), + part='xcu250-figd2104-2L-e', + io_type='io_stream', + hls_config=config, + ) + hls_model.compile() + y_hls4ml = hls_model.predict(X) + + np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) + + +@pytest.mark.parametrize('backend', ['Vivado', 'Quartus']) +def test_jet_tagging(backend): + # download test model + dl_dir = test_root_path + dl_file = dl_dir / "qkeras_jettagging.onnx" + jet_tagging_qonnx_url = ( + "https://raw.githubusercontent.com/fastmachinelearning/" + "QONNX_model_zoo/main/models/JetTagging/QKeras_hls4ml_3layer/qkeras_jettagging.onnx" + ) + urllib.request.urlretrieve(jet_tagging_qonnx_url, dl_file) + assert os.path.isfile(dl_file) + out_file = dl_dir / "qkeras_jettagging-clean.onnx" + + # cleanup + qonnx.util.cleanup.cleanup(dl_file, out_file=out_file) + model = ModelWrapper(out_file) + + # Execute QONNX model inference + # TODO make the test bigger + ishape = (1, 16) + np.random.seed(0) + X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32) + idict = {model.graph.input[0].name: X} + y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] + + # Convert QONNX model, compile, and run inference + config = hls4ml.utils.config_from_onnx_model(model) + # Some hand-derived config + # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation + + hls_model = hls4ml.converters.convert_from_onnx_model( + model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_jettag_{backend}'), backend=backend, hls_config=config + ) + hls_model.compile() + y_hls4ml = hls_model.predict(X) + + np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) + + +if __name__ == '__main__': + test_tfc_2w2a() From cda7208675c85ffadbcde4ce873521bf9187d7c1 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Tue, 11 Jul 2023 19:41:59 -0500 Subject: [PATCH 03/62] first migration of onnx parsing --- hls4ml/converters/onnx/convolution.py | 127 +++++++------- hls4ml/converters/onnx/core.py | 103 ++++++------ hls4ml/converters/onnx/merge.py | 24 +-- hls4ml/converters/onnx/pooling.py | 84 ++++------ hls4ml/converters/onnx/reshape.py | 37 ++--- hls4ml/converters/onnx_to_hls.py | 227 ++++++++++---------------- 6 files changed, 258 insertions(+), 344 deletions(-) diff --git a/hls4ml/converters/onnx/convolution.py b/hls4ml/converters/onnx/convolution.py index 39b2232169..85dc0ca804 100644 --- a/hls4ml/converters/onnx/convolution.py +++ b/hls4ml/converters/onnx/convolution.py @@ -1,85 +1,72 @@ -from hls4ml.converters.onnx_to_hls import ( - compute_pads_1d, - compute_pads_2d, - get_onnx_attribute, - get_onnx_input_name, - onnx_handler, -) -from hls4ml.converters.utils import compute_padding_1d, compute_padding_2d +import numpy as np + +from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler @onnx_handler('Conv') -def parse_conv_layer(reader, node, inputs_map, input_shapes, graph, config): +def parse_conv_layer(node, input_names, input_shapes, graph): layer = {} layer['name'] = node.name - layer['data_format'] = 'channels_first' # ONNX's default is channel first - layer['inputs'] = get_onnx_input_name(node, graph) - reader.add_input(layer['name'], node.input) + if node.domain != 'qonnx.custom_op.channels_last': + raise RuntimeError("Please convert the model to channels-last format with qonnx-to-channels-last") + layer['data_format'] = 'channels_last' # QONNX needs to be channels-last. + layer['inputs'] = input_names + layer['outputs'] = node.output strides = get_onnx_attribute(node, 'strides') kernel_shape = get_onnx_attribute(node, 'kernel_shape') - - if len(input_shapes[0]) == 3: # Conv1D - layer['class_name'] = 'Conv1D' - - layer['in_width'] = input_shapes[0][2] - layer['n_chan'] = input_shapes[0][1] - layer['filt_width'] = kernel_shape[0] - layer['n_filt'] = reader.get_weights_data(layer['name'], 'kernel').shape[2] - layer['stride_width'] = strides[0] - pads = compute_pads_1d(node, layer) - + # Note: currently don't have support for auto_pad. + pads = get_onnx_attribute(node, 'pads') + dilations = get_onnx_attribute(node, 'dilations') + if dilations is None: + dilations = [1] * len(layer['kernel_shape']) + + if get_onnx_attribute(node, 'group') != 1: + raise ValueError("Only 1 group supported corrently") + + layer['in_width'] = input_shapes[0][-2] + layer['n_chan'] = input_shapes[0][-1] + layer['n_filt'] = input_shapes[1][0] + + layer['n_dim'] = len(input_shapes[0]) - 2 # 2 comes from channels and batch dimentions + if layer['n_dim'] not in (1, 2): + raise ValueError("Only 1D and 2D convolutions are supported") + layer['class_name'] = 'Conv' + + # set some values needed later + if layer['n_dim'] == 1: + # this is 1D convolution + full_width = layer['in_width'] + pads[0] + pads[1] + eff_kernel_width = kernel_shape[0] * dilations[0] + layer['out_width'] = int(np.ceil((full_width - eff_kernel_width + 1) / strides[0])) + # for compatibility interpret some variables layer['pad_left'] = pads[0] layer['pad_right'] = pads[1] - - if all(x == 0 for x in pads): # No padding, i.e., 'VALID' padding - layer['padding'] = 'valid' - else: - layer['padding'] = 'same' - - (layer['out_width'], _, _) = compute_padding_1d( - layer['padding'], layer['in_width'], layer['stride_width'], layer['filt_width'] - ) - - output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_width']] - - elif len(input_shapes[0]) == 4: # Conv2D - layer['class_name'] = 'Conv2D' - - layer['in_height'] = input_shapes[0][2] - layer['in_width'] = input_shapes[0][3] - layer['n_chan'] = input_shapes[0][1] - + layer['filt_width'] = kernel_shape[0] + layer['stride_width'] = strides[0] + layer['dilation_width'] = dilations[0] + else: + # 2d + layer['in_height'] = input_shapes[0][-3] + full_height = layer['in_height'] + pads[0] + pads[2] + eff_kernel_height = kernel_shape[0] * dilations[0] + out_height = int(np.ceil((full_height - eff_kernel_height + 1) / strides[0])) + layer['out_height'] = out_height + + full_width = input_shapes[0][-2] + pads[1] + pads[3] + eff_kernel_width = kernel_shape[1] * dilations[1] + out_width = int(np.ceil((full_width - eff_kernel_width + 1) / strides[1])) + layer['out_width'] = out_width + # for compatibility interpret some variables + layer['pad_top'] = pads[0] + layer['pad_left'] = pads[1] + layer['pad_bottom'] = pads[2] + layer['pad_right'] = pads[3] layer['filt_height'] = kernel_shape[0] layer['filt_width'] = kernel_shape[1] - - layer['n_filt'] = next( - (x.type.tensor_type.shape.dim[1].dim_value for x in graph.value_info if x.name == node.output[0]), None - ) layer['stride_height'] = strides[0] layer['stride_width'] = strides[1] - pads = compute_pads_2d(node, layer) - - layer['pad_top'] = pads[0] - layer['pad_bottom'] = pads[2] - layer['pad_left'] = pads[1] - layer['pad_right'] = pads[3] - - if all(x == 0 for x in pads): # No padding, i.e., 'VALID' padding in Keras/Tensorflow - layer['padding'] = 'valid' - else: # Only 'valid' and 'same' padding are available in Keras - layer['padding'] = 'same' - - (layer['out_height'], layer['out_width'], _, _, _, _) = compute_padding_2d( - layer['padding'], - layer['in_height'], - layer['in_width'], - layer['stride_height'], - layer['stride_width'], - layer['filt_height'], - layer['filt_width'], - ) - - output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_height'], layer['out_width']] + layer['dilation_height'] = dilations[0] + layer['dilation_width'] = dilations[1] - return layer, output_shape + return layer diff --git a/hls4ml/converters/onnx/core.py b/hls4ml/converters/onnx/core.py index 940b860870..c6aaa6009c 100644 --- a/hls4ml/converters/onnx/core.py +++ b/hls4ml/converters/onnx/core.py @@ -1,28 +1,20 @@ -from hls4ml.converters.onnx_to_hls import get_onnx_attribute, get_onnx_input_name, onnx_handler +import numpy as np +from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler -@onnx_handler(*['Gemm', 'MatMul']) -def parse_gemm_layer(reader, node, inputs_map, input_shapes, graph, config): + +@onnx_handler('MatMul') +def parse_matmul_layer(node, input_names, input_shapes, graph): layer = {} - layer['class_name'] = 'Dense' + layer['class_name'] = 'MatMul' layer['name'] = node.name - layer['inputs'] = get_onnx_input_name(node, graph) - - tran_weight = get_onnx_attribute(node, 'transB', 0) - reader.add_input(layer['name'], node.input, tran_weight) - - weights_shape = reader.get_weights_data(layer['name'], 'kernel').shape - layer['n_in'] = weights_shape[0] - layer['n_out'] = weights_shape[1] - - output_shape = input_shapes[0][:] - output_shape[-1] = layer['n_out'] + layer['inputs'] = input_names + layer['outputs'] = list(node.output) - return layer, output_shape + return layer -# ------------------Global paras for activations # TODO: repair HardSigmoid support # https://github.com/fastmachinelearning/hls4ml/issues/409 activation_layers = [ @@ -37,7 +29,7 @@ def parse_gemm_layer(reader, node, inputs_map, input_shapes, graph, config): 'Softmax', 'Softsign', 'Softplus', - 'Clip', + # 'Clip', ] activation_map = { @@ -53,70 +45,89 @@ def parse_gemm_layer(reader, node, inputs_map, input_shapes, graph, config): 'Softmax': 'Softmax', 'Softsign': 'Activation', 'Softplus': 'Activation', - 'Clip': 'Clip', + # 'Clip': 'Clip', } # --------- @onnx_handler(*activation_layers) -def parse_activation_layer(reader, node, inputs_map, input_shapes, graph, config): +def parse_activation_layer(node, input_names, input_shapes, graph): layer = {} layer['name'] = node.name layer['class_name'] = activation_map[node.op_type] layer['activation'] = node.op_type.lower() - layer['inputs'] = get_onnx_input_name(node, graph) + layer['inputs'] = input_names + layer['outputs'] = list(node.output) if layer['class_name'] != 'Activation': if layer['class_name'] == 'Softmax': layer['activation'] = 'softmax' + layer['axis'] = get_onnx_attribute(node, 'axis', -1) elif layer['class_name'] in ['ELU', 'LeakyReLU', 'ThresholdedReLU']: layer['activation'] = layer['class_name'] layer['activ_param'] = get_onnx_attribute(node, 'alpha', 0.01) - elif layer['class_name'] == 'Clip': - clip_min_node = [x for x in graph.initializer if x.name in node.input] - clip_min = clip_min_node[0].float_data[0] + # # Don't yet support Clip + # elif layer['class_name'] == 'Clip': + # clip_min_node = [x for x in graph.initializer if x.name in input_names] + # clip_min = clip_min_node[0].float_data[0] - # Check if it's relu or not - if clip_min == 0.0: - layer['class_name'] = 'Activation' - layer['activation'] = 'ReLU' - else: - raise Exception('Clip with min != 0 is not supported yet!') + # # Check if it's relu or not + # if clip_min == 0.0: + # layer['class_name'] = 'Activation' + # layer['activation'] = 'ReLU' + # else: + # raise Exception('Clip with min != 0 is not supported yet!') else: layer['activation'] = layer['class_name'] layer['class_name'] = 'Activation' - return layer, [shape for shape in input_shapes[0]] + return layer @onnx_handler('BatchNormalization') -def parse_batchnorm_layer(reader, node, inputs_map, input_shapes, graph, config): +def parse_batchnorm_layer(node, input_names, input_shapes, graph): layer = {} - layer['class_name'] = 'BatchNormalization' - layer['data_format'] = 'channels_first' + layer['class_name'] = 'BatchNormOnnx' layer['name'] = node.name - layer['inputs'] = get_onnx_input_name(node, graph) + layer['inputs'] = input_names + layer['outputs'] = list(node.output) # Other attributes - layer['epsilon'] = get_onnx_attribute(node, 'epsilon') - layer['momentum'] = get_onnx_attribute(node, 'momentum') + layer['epsilon'] = get_onnx_attribute(node, 'epsilon', 1e-05) + # layer['momentum'] = get_onnx_attribute(node, 'momentum', 0.9) # not used - reader.add_input(layer['name'], node.input) - - in_size = 1 - for dim in input_shapes[0][1:]: - in_size *= dim - - layer['n_in'] = layer['n_out'] = in_size + layer['n_in'] = layer['n_out'] = np.prod(input_shapes[0][1:]) if len(input_shapes[0]) == 2: layer['n_filt'] = -1 elif len(input_shapes[0]) > 2: - layer['n_filt'] = input_shapes[0][1] # Always channel first for onnx + if node.domain != 'qonnx.custom_op.channels_last': + raise RuntimeError("Please convert the model to channels-last format with qonnx-to-channels-last") + layer['data_format'] = 'channels_last' # QONNX needs to be channels-last. + layer['n_filt'] = input_shapes[0][-1] + else: + raise RuntimeError(f"Unexpected input shape: {input_shapes[0]}") + + return layer + + +@onnx_handler('Quant') +def parse_quant_layer(node, input_names, input_shapes, graph): + layer = {} + + layer['class_name'] = 'Quant' + layer['name'] = node.name + layer['inputs'] = input_names + layer['outputs'] = list(node.output) + + # Other attributes + layer['narrow'] = bool(get_onnx_attribute(node, 'narrow')) + layer['rounding_mode'] = get_onnx_attribute(node, 'rounding_mode') + layer['signed'] = bool(get_onnx_attribute(node, 'signed')) - return layer, [shape for shape in input_shapes[0]] + return layer diff --git a/hls4ml/converters/onnx/merge.py b/hls4ml/converters/onnx/merge.py index 9ccd432d18..2309cc213f 100644 --- a/hls4ml/converters/onnx/merge.py +++ b/hls4ml/converters/onnx/merge.py @@ -1,16 +1,16 @@ -from hls4ml.converters.onnx_to_hls import get_onnx_attribute, get_onnx_input_name, onnx_handler +from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler -merge_layers = ['Add', 'Sub', 'Mul', 'Average', 'Max', 'Min', 'Concat', 'Sum'] +merge_layers = ['Add', 'Sub', 'Mul', 'Div', 'Average', 'Max', 'Min', 'Concat', 'Sum'] @onnx_handler(*merge_layers) -def parse_merge_layer(reader, node, inputs_map, input_shapes, graph, config): +def parse_merge_layer(node, input_names, input_shapes, graph): layer = {} layer['class_name'] = node.op_type layer['name'] = node.name layer['op'] = layer['class_name'].lower() - layer['inputs'] = get_onnx_input_name(node, graph) - output_shape = input_shapes[0] + layer['inputs'] = input_names + layer['outputs'] = list(node.output) if layer['class_name'] == 'Concat': rank = len(input_shapes[0][1:]) @@ -21,22 +21,10 @@ def parse_merge_layer(reader, node, inputs_map, input_shapes, graph, config): layer['op'] = layer['class_name'].lower() + f'{rank}d' layer['axis'] = get_onnx_attribute(node, 'axis') - # Calculate output shape - new_dim = sum( - [x.type.tensor_type.shape.dim[layer['axis']].dim_value for x in graph.value_info if x.name in node.input] - ) - output_shape[layer['axis']] = new_dim - - elif layer['class_name'] == 'Add': - # Check if the layer is an AddBias - for input in node.input: - if "bias" in input: - layer['class_name'] = 'BiasAdd' - reader.add_input(layer['name'], node.input) else: layer['class_name'] = 'Merge' if len(layer['inputs']) > 2: raise Exception('ERROR: Merging more than two tensors is not yet supported.') - return layer, output_shape + return layer diff --git a/hls4ml/converters/onnx/pooling.py b/hls4ml/converters/onnx/pooling.py index 67fa76c7c7..1f5c431004 100644 --- a/hls4ml/converters/onnx/pooling.py +++ b/hls4ml/converters/onnx/pooling.py @@ -1,26 +1,30 @@ -from hls4ml.converters.onnx_to_hls import ( - compute_pads_1d, - compute_pads_2d, - get_onnx_attribute, - get_onnx_input_name, - onnx_handler, -) -from hls4ml.converters.utils import compute_padding_1d, compute_padding_2d +import numpy as np + +from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler pool_operations = ['AveragePool', 'MaxPool'] @onnx_handler(*pool_operations) -def parse_pool_layer(reader, node, inputs_map, input_shapes, graph, config): +def parse_pool_layer(node, input_names, input_shapes, graph): layer = {} layer['name'] = node.name - layer['inputs'] = get_onnx_input_name(node, graph) + layer['inputs'] = input_names + layer['outputs'] = list(node.output) + if node.domain != 'qonnx.custom_op.channels_last': + raise RuntimeError("Please convert the model to channels-last format with qonnx-to-channels-last") layer['class_name'] = node.op_type - layer['data_format'] = 'channels_first' # Default ONNX + layer['data_format'] = 'channels_last' # Default QONNX info = layer['class_name'].replace('Pool', '') strides = get_onnx_attribute(node, 'strides') kernel_shape = get_onnx_attribute(node, 'kernel_shape') + pads = get_onnx_attribute(node, 'pads') + layer['pads'] = pads + dilations = get_onnx_attribute(node, 'dilations') + if dilations is None: + dilations = [1] * len(kernel_shape) + layer['dilations'] = dilations if len(input_shapes[0]) == 3: # 1D layer['class_name'] = info + 'Pooling1D' @@ -31,70 +35,50 @@ def parse_pool_layer(reader, node, inputs_map, input_shapes, graph, config): layer['pool_width'] = kernel_shape[0] layer['stride_width'] = strides[0] - # Padding - pads = compute_pads_1d(node, layer) - layer['pad_left'] = pads[0] - layer['pad_right'] = pads[1] - - if all(x == 0 for x in pads): # No padding, i.e., 'VALID' padding - layer['padding'] = 'valid' - else: - layer['padding'] = 'same' - - (layer['n_out'], _, _) = compute_padding_1d( - layer['padding'], layer['n_in'], layer['stride_width'], layer['pool_width'] + # formula from ONNX Operators.md documentation + layer['n_out'] = int( + np.floor((layer['n_in'] + np.sum(pads) - ((kernel_shape[0] - 1) * dilations[0] + 1)) / strides[0] + 1) ) - output_shape = [input_shapes[0][0], layer['n_filt'], layer['n_out']] - elif len(input_shapes[0]) == 4: # 2D layer['class_name'] = info + 'Pooling2D' - layer['n_filt'] = input_shapes[0][1] - layer['in_height'] = input_shapes[0][2] - layer['in_width'] = input_shapes[0][3] + layer['n_filt'] = input_shapes[0][3] + layer['in_height'] = input_shapes[0][1] + layer['in_width'] = input_shapes[0][2] layer['stride_height'] = strides[0] layer['stride_width'] = strides[1] layer['pool_height'] = layer['filt_height'] = kernel_shape[0] layer['pool_width'] = layer['filt_width'] = kernel_shape[1] - pads = compute_pads_2d(node, layer) layer['pad_top'] = pads[0] layer['pad_bottom'] = pads[2] layer['pad_left'] = pads[1] layer['pad_right'] = pads[3] - if all(x == 0 for x in pads): # No padding, i.e., 'VALID' padding in Keras/Tensorflow - layer['padding'] = 'valid' - else: # Only 'valid' and 'same' padding are available in Keras - layer['padding'] = 'same' - - (layer['out_height'], layer['out_width'], _, _, _, _) = compute_padding_2d( - layer['padding'], - layer['in_height'], - layer['in_width'], - layer['stride_height'], - layer['stride_width'], - layer['filt_height'], - layer['filt_width'], + # formula from ONNX Operators.md documentation + layer['out_height'] = int( + np.floor((layer['in_height'] + pads[0] + pads[2] - ((kernel_shape[0] - 1) * dilations[0] + 1)) / strides[0] + 1) + ) + layer['out_width'] = int( + np.floor((layer['in_width'] + pads[1] + pads[3] - ((kernel_shape[1] - 1) * dilations[1] + 1)) / strides[1] + 1) ) - output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_height'], layer['out_width']] - - return layer, output_shape + return layer global_pooling_layers = ['GlobalMaxPool', 'GlobalAveragePool'] @onnx_handler(*global_pooling_layers) -def parse_global_pooling_layer(reader, node, inputs_map, input_shapes, graph, config): +def parse_global_pooling_layer(node, input_names, input_shapes, graph): layer = {} layer['name'] = node.name - layer['inputs'] = get_onnx_input_name(node, graph) + layer['inputs'] = input_names + layer['outputs'] = list(node.output) layer['class_name'] = node.op_type - layer['data_format'] = 'channels_first' + layer['data_format'] = 'channels_last' # default QONNX # Sonme default parameters for global pooling layer['n_out'] = 1 @@ -116,6 +100,4 @@ def parse_global_pooling_layer(reader, node, inputs_map, input_shapes, graph, co layer['in_height'] = input_shapes[0][2] layer['in_width'] = input_shapes[0][3] - output_shape = [input_shapes[0][0], layer['n_filt']] + [1] * (len(input_shapes[0]) - 2) - - return layer, output_shape + return layer diff --git a/hls4ml/converters/onnx/reshape.py b/hls4ml/converters/onnx/reshape.py index 5bbf58b079..9ef20f03d7 100644 --- a/hls4ml/converters/onnx/reshape.py +++ b/hls4ml/converters/onnx/reshape.py @@ -1,39 +1,38 @@ -import numpy as np - -from hls4ml.converters.onnx_to_hls import get_onnx_input_name, onnx_handler +from hls4ml.converters.onnx_to_hls import onnx_handler @onnx_handler('Transpose') -def parse_transpose_layer(reader, node, inputs_map, input_shapes, graph, config): +def parse_transpose_layer(node, input_names, input_shapes, graph): layer = {} layer['name'] = node.name layer['class_name'] = 'Transpose' - layer['inputs'] = get_onnx_input_name(node, graph) + layer['inputs'] = input_names + layer['outputs'] = list(node.output) perm = [list(i.ints) for i in node.attribute][0] # This will get something like [[a,b,c]][0] = [a,b,c] layer['perm'] = [x - 1 for x in perm[1:]] # Ignore the batch dimension in ONNX, and adjust the perm indexing - output_shape = [input_shapes[0][i] for i in perm] - - return layer, output_shape + return layer @onnx_handler('Reshape') -def parse_reshape_layer(reader, node, inputs_map, input_shapes, graph, config): +def parse_reshape_layer(node, input_names, input_shapes, graph): layer = {} layer['name'] = node.name layer['class_name'] = 'Reshape' - layer['inputs'] = get_onnx_input_name(node, graph) + layer['inputs'] = input_names + layer['outputs'] = list(node.output) - target_shape = list([x for x in graph.initializer if x.name == node.input[1]][0].int64_data)[1:] + return layer - if -1 in target_shape: # Need to infer shape for -1 - print("WARNING: Inferring -1 shape ... ") - dummy_x = np.ones(input_shapes[0][1:]) - dummy_y = np.reshape(dummy_x, target_shape) - target_shape = list(dummy_y.shape) - layer['target_shape'] = target_shape - output_shape = input_shapes[0][:1] + layer['target_shape'] +@onnx_handler('Flatten') +def parse_flatten_layer(node, input_names, input_shapes, graph): + layer = {} + layer['name'] = node.name + layer['class_name'] = 'Reshape' + layer['inputs'] = input_names + layer['outputs'] = list(node.output) + layer['target_shape'] = [-1] # does not contain batch dimension - return layer, output_shape + return layer diff --git a/hls4ml/converters/onnx_to_hls.py b/hls4ml/converters/onnx_to_hls.py index 106daf62da..8f6c7461fb 100644 --- a/hls4ml/converters/onnx_to_hls.py +++ b/hls4ml/converters/onnx_to_hls.py @@ -1,78 +1,10 @@ -import numpy as np import onnx -from onnx import helper, numpy_helper, shape_inference +from onnx import helper, numpy_helper from hls4ml.model import ModelGraph -MAXMULT = 4096 - -class ONNXDataReader: - """ - ONNX data reader to be used for extracting relevant information during conversion. - """ - - def __init__(self, model): - self.model = model - self.input_map = {} - self.index_map = { - # Dense - 'kernel': 1, - 'bias': 2, - # BatchNormalization - 'gamma': 1, - 'beta': 2, - 'moving_mean': 3, - 'moving_variance': 4, - } - - def get_weights_data(self, layer_name, var_name): - """Extract weights data from ONNX model. - - Args: - layer_name (str): Layer's name in the ONNX model. - var_name (str): Variable to be extracted. - - Returns: - ndarray: Extracted weights data. - """ - # Get the node associated with the layer name - node = next(node for node in self.model.graph.node if node.name == layer_name) - - inputs = self.input_map[layer_name] - inp_idx = self.index_map[var_name] - - if inp_idx >= len(inputs['inputs']): - # Check if the layer is an AddBias layer - if (node.op_type == 'Add') and (var_name == 'bias'): - inp_idx = 1 - else: - # Input not found, likely a bias tensor is not available - return None - - tensor = next((x for x in self.model.graph.initializer if x.name == inputs['inputs'][inp_idx]), None) - - if tensor is not None: - data = numpy_helper.to_array(tensor) - - if inputs['transpose']: - if inputs['perm'] is not None and len(data.shape) == len(inputs['perm']): - data = data.transpose(inputs['perm']) - else: - data = data.transpose() - - # Check for transB in Gemm - if node.op_type == 'Gemm': - if not get_onnx_attribute(node, 'transB'): - data = data.transpose() - - return data - - def add_input(self, layer_name, inputs, transpose=True, perm=None): - self.input_map[layer_name] = {'inputs': inputs, 'transpose': transpose, 'perm': perm} - - -# ----------------------Helpers--------------------- # +# ----------------------Helpers--------------------- def sanitize_layer_name(layer): new_name = layer['name'] if new_name[0].isdigit(): @@ -99,9 +31,52 @@ def get_onnx_attribute(operation, name, default=None): return value -def get_input_shape(model, operation, input_idx=0): - value_info_idx = next((i for i, x in enumerate(model.graph.value_info) if x.name == operation.input[input_idx]), 0) - return [d.dim_value for d in model.graph.value_info[value_info_idx].type.tensor_type.shape.dim] +def get_global_input_shape(graph, inp): + """Return the global input shape of the graph with name inp + + Arguments: + graph: the onnx graph + inp (str): the global input name + + Returns: + list: The shape + + Raises: + StopIteration: If the global input name is not found + """ + inp_shape = next(x.type.tensor_type.shape.dim for x in graph.input if x.name == inp) + return list(x.dim_value for x in inp_shape) + + +def get_input_shape(graph, node): + """Return the input shapes of the node in the model + + Arguments: + graph: the onnx graph + node: the onnx node for which the input is desired + + Returns: + list of lists: The shapes of all the inputs + + Raises: + StopIteration: If the an input name is not found in the graph + """ + rv = [] + for inp in node.input: + try: + value_info_idx = next((i for i, x in enumerate(graph.value_info) if x.name == inp)) + dim = list(d.dim_value for d in graph.value_info[value_info_idx].type.tensor_type.shape.dim) + except StopIteration: + # The input is not in the graph, likely it's the input + dim = get_global_input_shape(graph, inp) + if dim: + rv.append(dim) + return rv + + +def get_constant_value(graph, constant_name): + tensor = next((x for x in graph.initializer if x.name == constant_name), None) + return numpy_helper.to_array(tensor) def compute_pads_1d(operation, layer): @@ -155,7 +130,7 @@ def compute_pads_2d(operation, layer): return pads -# ----------------------Layer handling--------------------- # +# ----------------------Layer handling--------------------- layer_handlers = {} @@ -178,27 +153,6 @@ def decorator(function): return decorator -# --->> A set of functions to address the naming convetion in ONNx's graph -def get_onnx_input_name(node, graph): - """ - In ONNX, when calling node.input, it returns the node input's index in the graph instead of the input's name. - However, the input's name is used for indexing in ModelGraph's graph. This function return the input node's name instead. - """ - - in_node = [in_node for in_node in graph.node if (in_node.output[0] in node.input)] - - if in_node: - if in_node[0].op_type != 'Flatten': - input_node_name = [x.name for x in in_node] - else: # IF it's a flatten - input_node_name = [x.name for x in graph.node if (x.output[0] in in_node[0].input)] - - return input_node_name - - else: # If there is no input name it's actually the first layer - return [replace_char_inconsitency(node.input[0])] - - def get_out_layer_name(graph): """ Get the output layer's name for the model. @@ -226,18 +180,16 @@ def onnx_to_hls(config): # Extract model architecture print('Interpreting Model ...') - model = onnx.load(config['OnnxModel']) if isinstance(config['OnnxModel'], str) else config['OnnxModel'] + onnx_model = onnx.load(config['OnnxModel']) if isinstance(config['OnnxModel'], str) else config['OnnxModel'] - model = shape_inference.infer_shapes(model) - graph = model.graph - - reader = ONNXDataReader(model) + # We don't infer the shapes because the qonnx package preprocessing does it. # Obtain list of input/ouput layers - all_inputs = [x.name for x in model.graph.input] - all_initializers = [x.name for x in model.graph.initializer] + all_inputs = [x.name for x in onnx_model.graph.input] + all_initializers = [x.name for x in onnx_model.graph.initializer] input_layers = [x for x in all_inputs if x not in all_initializers] - output_layers = get_out_layer_name(graph) + constant_layers = all_initializers # no need to copy it even though we change it + output_layers = get_out_layer_name(onnx_model.graph) print("Output layers: ", output_layers) @@ -245,69 +197,64 @@ def onnx_to_hls(config): input_layer = {} input_layer['name'] = replace_char_inconsitency(inp) input_layer['class_name'] = 'InputLayer' - inp_shape = next((x.type.tensor_type.shape.dim for x in model.graph.input if x.name == inp), None) - input_layer['input_shape'] = [x.dim_value for x in inp_shape] - - if len(input_layer['input_shape']) > 1: - input_layer['input_shape'][0] = None # Firt dim is batch + inp_shape = get_global_input_shape(onnx_model.graph, inp) + # We only support ONNX where the first dimension is the batch dimension. + # Remove the batch dimension in all subsequnt use + input_layer['input_shape'] = inp_shape[1:] + print('Input shape:', input_layer['input_shape']) # Clean the layer name for specific models sanitize_layer_name(input_layer) input_layers[i] = input_layer['name'] layer_list.append(input_layer) + for i, constant in enumerate(constant_layers): + constant_layer = {} + constant_layer['name'] = replace_char_inconsitency(constant) + constant_layer['class_name'] = 'Constant' + constant_layer['value'] = get_constant_value(onnx_model.graph, constant) + + # Clean the layer name for specific models + sanitize_layer_name(constant_layer) + constant_layers[i] = constant_layer['name'] + + layer_list.append(constant_layer) + # Defined supported layers and check for unsupported layer type - skip_layers = ['Dropout', 'Identity', 'Flatten'] + skip_layers = ['Dropout', 'Identity'] # Map inputs of skipped layers inputs_map = {} supported_layers = get_supported_onnx_layers() + skip_layers - # Get input shape - current_shape = [input_layer['input_shape']] - print('Input shape:', current_shape[0]) - - # Loop through layers - layer_counter = 0 - - # Output shape tracking - output_shape = None - print('Topology:') - for node in graph.node: + for node in onnx_model.graph.node: if node.op_type not in supported_layers: raise Exception(f'ERROR: Unsupported operation type: {node.op_type}') - # If not the first layer then input shape is taken from last layer's output - if layer_counter != 0: - current_shape = [output_shape] + # Note that at this point, input shape still contains batch dimension + # in cases where it appears. That is not filtered out till later. + input_shapes = get_input_shape(onnx_model.graph, node) if node.op_type in skip_layers: - if node.op_type == 'Flatten': - output_shape = [current_shape[0][0], np.prod(current_shape[0][1:])] - - else: - # Currently supported skipped layers have only one input and output - # Skipped layers can follow each other (e.g., Dropout -> Flatten) - - # Mapping inputs - input_name = inputs_map.get(node.input[0], node.input[0]) - output_name = node.output[0] - inputs_map[output_name] = input_name + # Currently supported skipped layers have only one input and output + # Skipped layers can follow each other - output_shape = current_shape[0] + # Mapping inputs + input_name = inputs_map.get(node.input[0], node.input[0]) + output_name = node.output[0] + inputs_map[output_name] = input_name continue - if node.op_type in supported_layers: - layer_counter = layer_counter + 1 + input_names = [inputs_map.get(x, x) for x in node.input] # Process the layer - layer, output_shape = layer_handlers[node.op_type](reader, node, inputs_map, current_shape, graph, config) + layer = layer_handlers[node.op_type](node, input_names, input_shapes, onnx_model.graph) sanitize_layer_name(layer) - print('Layer name: {}, layer type: {}, current shape: {}'.format(layer['name'], layer['class_name'], current_shape)) + print(f"Layer name: {layer['name']}, layer type: {layer['class_name']}, current shape: {input_shapes}") layer_list.append(layer) ################# @@ -315,5 +262,5 @@ def onnx_to_hls(config): ################# print('Creating HLS model') - hls_model = ModelGraph(config, reader, layer_list, input_layers, output_layers) + hls_model = ModelGraph(config, layer_list, input_layers, output_layers) return hls_model From af47a0d4563d986db0b7412536983d77ed9cedca Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Wed, 12 Jul 2023 13:50:51 -0500 Subject: [PATCH 04/62] change tuples to lists --- hls4ml/model/layers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index 6a23a9b934..320a1fde57 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -352,7 +352,7 @@ class Constant(Layer): def initialize(self): value = self.attributes['value'] self.value = value # note, this is unquantized; Only here for easier access - shape = value.shape + shape = list(value.shape) if not shape: shape = (1,) self.value = np.array([self.value]) @@ -963,10 +963,10 @@ def initialize(self): if len(inp2.shape) == 1: # mat vec multiply assert inp1.shape[-1] == inp2.shape[0] - shape = tuple(inp1.shape[:-1]) + (inp2.shape[0],) + shape = list(inp1.shape[:-1]) + [inp2.shape[0]] else: assert inp1.shape[-1] == inp2.shape[-2] - shape = tuple(inp1.shape[:-1]) + (inp2.shape[-1],) + shape = list(inp1.shape[:-1]) + [inp2.shape[-1]] if len(shape) > 1: dims = [f'N_LAYER_{i}_{self.index}' for i in range(1, len(shape) + 1)] else: From 8f8cc0b21e23f52c5d750cbbc2ea56104008c6d7 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Wed, 12 Jul 2023 18:26:28 -0500 Subject: [PATCH 05/62] snapshot of adding qonnx optimizers --- hls4ml/backends/fpga/fpga_backend.py | 8 +- hls4ml/model/layers.py | 4 +- hls4ml/model/optimizer/__init__.py | 8 + .../model/optimizer/passes/batchnorm_opt.py | 169 ++++++++ .../model/optimizer/passes/conv_to_convxd.py | 90 ++++ .../optimizer/passes/matmul_const_to_dense.py | 58 +++ hls4ml/model/optimizer/passes/merge_const.py | 192 +++++++++ hls4ml/model/optimizer/passes/move_scales.py | 301 ++++++++++++++ .../passes/propagate_conv_precision.py | 77 ++++ .../passes/propagate_dense_precision.py | 70 ++++ hls4ml/model/optimizer/passes/qkeras.py | 35 +- hls4ml/model/optimizer/passes/quant_opt.py | 387 ++++++++++++++++++ .../model/optimizer/passes/reshape_const.py | 27 ++ 13 files changed, 1389 insertions(+), 37 deletions(-) create mode 100644 hls4ml/model/optimizer/passes/batchnorm_opt.py create mode 100644 hls4ml/model/optimizer/passes/conv_to_convxd.py create mode 100644 hls4ml/model/optimizer/passes/matmul_const_to_dense.py create mode 100644 hls4ml/model/optimizer/passes/merge_const.py create mode 100644 hls4ml/model/optimizer/passes/move_scales.py create mode 100644 hls4ml/model/optimizer/passes/propagate_conv_precision.py create mode 100644 hls4ml/model/optimizer/passes/propagate_dense_precision.py create mode 100644 hls4ml/model/optimizer/passes/quant_opt.py create mode 100644 hls4ml/model/optimizer/passes/reshape_const.py diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py index 8cfaec8b3f..97e458f7fd 100644 --- a/hls4ml/backends/fpga/fpga_backend.py +++ b/hls4ml/backends/fpga/fpga_backend.py @@ -13,6 +13,8 @@ LSTM, Activation, BatchNormalization, + BatchNormOnnx, + Conv, Conv1D, Conv2D, Dense, @@ -22,8 +24,10 @@ GarNetStack, GlobalPooling1D, GlobalPooling2D, + MatMul, Pooling1D, Pooling2D, + Quant, SeparableConv1D, SeparableConv2D, SimpleRNN, @@ -63,6 +67,8 @@ def __init__(self, name): LSTM, GRU, Dot, + Conv, + MatMul, ] for layer in accum_layers: @@ -70,7 +76,7 @@ def __init__(self, name): attrs.append(TypeAttribute('accum')) self.attribute_map[layer] = attrs - rf_layers = accum_layers + [BatchNormalization, Activation, Embedding, GarNet, GarNetStack] + rf_layers = accum_layers + [BatchNormalization, Activation, Embedding, GarNet, GarNetStack, Quant, BatchNormOnnx] for layer in rf_layers: attrs = self.attribute_map.get(layer, []) diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index 320a1fde57..bd465ff7b9 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -391,8 +391,8 @@ def initialize(self): # need to get it from the input shape_node = self.get_input_node(self.inputs[1]) # for QONNX, remove batch dimension - # (onnx cleaning should have removed reshape dimension) - if shape_node: + # (onnx cleaning should have removed reshapes not on data path) + if isinstance(shape_node, Constant): target_shape = shape_node.value[1:] else: raise RuntimeError("Reshape for ONNX requires the target shape to be a second input.") diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index 2e9b197475..db65370e40 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -37,6 +37,14 @@ 'fuse_bias_add', 'remove_useless_transpose', 'expand_layer_group', + 'reshape_constant', + 'quant_constant_parameters', + 'quant_to_activation', + 'fuse_quant_with_constant', + 'quant_to_alpha_activation_alpha', + 'const_quant_to_const_alpha', + 'matmul_const_to_dense', + 'conv_to_conv_x_d', 'output_rounding_saturation_mode', 'qkeras_factorize_alpha', 'extract_ternary_threshold', diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py new file mode 100644 index 0000000000..a7b0c27209 --- /dev/null +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -0,0 +1,169 @@ +import numpy as np + +from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant +from hls4ml.model.optimizer import OptimizerPass + +_base_attributes = ('Trace', 'reuse_factor', 'n_in', 'n_filt') + + +class BatchNormOnnxConstantParameters(OptimizerPass): + """Remove Constant from the BatchNormalization node parameters (but not input[0])""" + + def match(self, node): + is_match = isinstance(node, BatchNormOnnx) and any(node.inputs[1:]) + + return is_match + + def transform(self, model, node): + """ + Remove Constant from the BatchNormalization node parameters (but not input[0]) + """ + + if not (len(node.inputs) == 5 and all(node.inputs)): + raise ValueError(f"All {len.node.inputs} BatchNormOnnnx inputs need to be defined") + + attributes = {k: node.attributes.get(k, None) for k in _base_attributes} + + gamma_node = node.get_input_node(node.inputs[1]) + if not isinstance(gamma_node, Constant): + raise TypeError("Only consant gammas supported") + gamma = gamma_node.value + attributes['gamma_data'] = gamma + node.inputs[1] = '' + model.remove_node(gamma_node, rewire=False) + + beta_node = node.get_input_node(node.inputs[2]) + if not isinstance(beta_node, Constant): + raise TypeError("Only consant betas supported") + beta = beta_node.value + attributes['beta_data'] = beta + node.inputs[2] = '' + model.remove_node(beta_node, rewire=False) + + moving_mean_node = node.get_input_node(node.inputs[3]) + if not isinstance(moving_mean_node, Constant): + raise TypeError("Only consant moving_means supported") + moving_mean = moving_mean_node.value + attributes['mean_data'] = moving_mean + node.inputs[3] = '' + model.remove_node(moving_mean_node, rewire=False) + + moving_variance_node = node.get_input_node(node.inputs[4]) + if not isinstance(moving_variance_node, Constant): + raise TypeError("Only consant moving_variances supported") + moving_variance = moving_variance_node.value + attributes['variance_data'] = moving_variance + node.inputs[4] = '' + model.remove_node(moving_variance_node, rewire=False) + + # scale = gamma / np.sqrt(moving_variance + node.get_attr('epsilon')) + # bias = beta - gamma * moving_mean / np.sqrt(moving_variance + node.get_attr('epsilon')) + # attributes["scale_data"] = scale + # attributes["bias_data"] = bias + + new_node = model.make_node(BatchNormalization, node.name, attributes, [node.inputs[0]], [x for x in node.outputs]) + + model.replace_node(node, new_node) + + return True + + +class ConstantBatchNormFusion(OptimizerPass): + """ + Merge BatchNorm into Const (after parameters have already been merged in BatchNormalization) + """ + + def match(self, node): + is_match = ( + isinstance(node, BatchNormalization) + and not any(node.inputs[1:]) + and isinstance(node.get_input_node(node.inputs[0]), Constant) + and not node.get_input_node(node.inputs[0]).get_attr("quant_precision") + ) + return is_match + + def transform(self, model, node): + """ + Remove the batch norm + """ + const_node = node.get_input_node(node.inputs[0]) + + new_val = const_node.value * node.weights["scale"].data_unquantized + node.weights["bias"].data_unquantized + const_node.set_attr("value", new_val) + const_node.set_attr("quantizer", node.get_attr("quantizer")) # None if not defined + const_node.set_attr("quant_precision", node.get_attr("quant_precision")) + + # reinitialize (which also runs quantization if quantizer exists) + const_node.initialize() + + # remove the batch norm node + model.remove_node(node, rewire=True) + + return True + + +class FuseConsecutiveBatchNormalization(OptimizerPass): + ''' + OptimizerPass to merge consecutive BatchNormalization layers, + only if the earlier one does not have quantization specified + ''' + + def match(self, node): + prev_node = node.get_input_node(node.inputs[0]) + basic_match = ( + isinstance(node, BatchNormalization) + and isinstance(prev_node, BatchNormalization) + and not prev_node.get_attr("quant_precision") + ) + + # check for compatibility to merge + if basic_match: + s0 = prev_node.weights['scale'].data_unquantized + b0 = prev_node.weights['bias'].data_unquantized + s1 = node.weights['scale'].data_unquantized + b1 = node.weights['bias'].data_unquantized + scale_compatible = ( + (prev_node.get_attr("scale_quantizer") is None and node.get_attr("scale_quantizer") is None) + or (s0 == np.ones_like(s0)).all() + or (s1 == np.ones_like(s1)).all() + ) + bias_compatible = ( + (prev_node.get_attr("bias_quantizer") is None and node.get_attr("bias_quantizer") is None) + or (b0 == np.zeros_like(b0)).all() + or (b1 == np.zeros_like(b1)).all() + ) + return scale_compatible and bias_compatible + else: + return False + + def transform(self, model, node): + prev_node = node.get_input_node(node.inputs[0]) + + s0 = prev_node.weights['scale'].data_unquantized + b0 = prev_node.weights['bias'].data_unquantized + s1 = node.weights['scale'].data_unquantized + b1 = node.weights['bias'].data_unquantized + + s_quantizer = ( + node.get_attr("scale_quantizer") if (s0 == np.ones_like(s0)).all() else prev_node.get_attr("scale_quantizer") + ) + b_quantizer = ( + node.get_attr("bias_quantizer") if (b0 == np.zeros_like(b0)).all() else prev_node.get_attr("bias_quantizer") + ) + + node.set_attr("scale_quantizer", s_quantizer) + node.set_attr("bias_quantizer", b_quantizer) + if s_quantizer: + node.set_attr("scale_precision", s_quantizer.hls_type) + if b_quantizer: + node.set_attr("bias_precision", b_quantizer.hls_type) + + scale_new = s0 * s1 + bias_new = s1 * b0 + b1 + + # call function so that quantizer would be called if needed + node.add_weights(scale_new, quantizer=s_quantizer) + node.add_bias(bias_new, quantizer=b_quantizer) + + model.remove_node(prev_node, rewire=True) + return True diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py new file mode 100644 index 0000000000..28f4d4c0bd --- /dev/null +++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py @@ -0,0 +1,90 @@ +import numpy as np + +from hls4ml.model.layers import Constant, Conv, Conv1D, Conv2D +from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.types import IntegerPrecisionType + +# these are attributes to copy +_base_attributes = ( + 'Trace', + 'reuse_factor', + 'in_width', + 'out_width', + 'n_chan', + 'n_filt', + 'pad_left', + 'pad_right', + 'filt_width', + 'stride_width', + 'dilation_width', + 'in_height', + 'out_height', + 'pad_top', + 'pad_bottom', + 'filt_height', + 'stride_height', + 'dilation_height', + 'strategy', + 'data_format', +) + + +class ConvToConvXD(OptimizerPass): + """Convert Conv with constant to a Conv1D or Conv2D layer""" + + def match(self, node): + is_match = isinstance(node, Conv) and ( + (len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant)) + or ( + len(node.inputs) == 3 + and isinstance(node.get_input_node(node.inputs[1]), Constant) + and isinstance(node.get_input_node(node.inputs[2]), Constant) + ) + ) + + return is_match + + def transform(self, model, node): + """Convert Conv with constant to a Conv1D or Conv2D layer""" + + weight_node = node.get_input_node(node.inputs[1]) + weight_precision = weight_node.get_attr("quant_precision") + bias_node = None + bias_precision = None + if len(node.inputs) == 3: + bias_node = node.get_input_node(node.inputs[2]) + bias_precision = bias_node.get_attr("quant_precision") + + # creating the attributes + attributes = {k: node.attributes.get(k, None) for k in _base_attributes} + + # The ConvxD nodes expect the weight data to be in a different format, not (M, k1.., C) + if node.attributes['n_dim'] == 1: + newtype = Conv1D + attributes["weight_data"] = np.transpose(weight_node.value, (1, 2, 0)) + else: + newtype = Conv2D + attributes["weight_data"] = np.transpose(weight_node.value, (1, 2, 3, 0)) + attributes["weight_precision"] = weight_precision + attributes["weight_quantizer"] = weight_node.get_attr("quantizer") + + if bias_node: + attributes["bias_data"] = bias_node.value + attributes["bias_precision"] = bias_precision + attributes["bias_quantizer"] = bias_node.get_attr("quantizer") + else: + attributes["bias_data"] = np.zeros(attributes['n_filt']) + attributes["bias_precision"] = IntegerPrecisionType(1, False) + + # making new node + new_node = model.make_node( + newtype, f"{newtype.__name__}_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs] + ) + + # removing and replacing old nodes + model.remove_node(weight_node, rewire=False) + if bias_node: + model.remove_node(bias_node, rewire=False) + model.replace_node(node, new_node) + + return True diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py new file mode 100644 index 0000000000..82c7b56313 --- /dev/null +++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py @@ -0,0 +1,58 @@ +import numpy as np + +from hls4ml.model.layers import Constant, Dense, MatMul +from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.types import IntegerPrecisionType + +_base_attributes = ('Trace', 'reuse_factor', 'weight', 'weight_t', 'bias', 'bias_t') + + +class MatmulConstToDense(OptimizerPass): + """ + Convert MatMul with constant to a dense layer. Note, this only supports the second input + being the constant. If needed, one could add transposes to make that be the case in + other yet to be written optimizers. + """ + + def match(self, node): + is_match = ( + isinstance(node, MatMul) and len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant) + ) + return is_match + + def transform(self, model, node): + """Substitute Matmul + Constant for a single dense""" + # determining Constant layer input + const_node = node.get_input_node(node.inputs[1]) + other_var = node.get_input_variable(node.inputs[0]) + + weight_precision = const_node.get_attr("quant_precision") + weight_quantizer = const_node.get_attr("quantizer") + + in_shape = other_var.shape + n_in = np.prod(in_shape) + out_shape = list(in_shape[:-1]) + [const_node.value.shape[-1]] + n_out = np.prod(out_shape) + + # creating the attributes + attributes = {k: node.attributes.get(k, None) for k in _base_attributes} + attributes.update( + { + "weight_data": const_node.value, + "weight_precision": weight_precision, + "weight_quantizer": weight_quantizer, + "bias_data": np.zeros(out_shape), + "bias_precision": IntegerPrecisionType(1, False), + "n_in": n_in, + "n_out": n_out, + } + ) + + # making new node + new_dense = model.make_node(Dense, f"Dense_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs]) + + # removing and replacing old nodes + model.remove_node(const_node, rewire=False) + model.replace_node(node, new_dense) + + return True diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py new file mode 100644 index 0000000000..4e339ccc3f --- /dev/null +++ b/hls4ml/model/optimizer/passes/merge_const.py @@ -0,0 +1,192 @@ +import numpy as np + +from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer +from hls4ml.model.layers import BatchNormalization, Constant, Merge +from hls4ml.model.optimizer import OptimizerPass + +_base_attributes = ('Trace', 'reuse_factor', 'n_in') + +# TODO This doesn't yet support quantization in the constants + + +class MergeTwoConstants(OptimizerPass): + """Merge of two constants makes another constant""" + + def match(self, node): + is_match = ( + isinstance(node, Merge) + and isinstance(node.get_input_node(node.inputs[0]), Constant) + and isinstance(node.get_input_node(node.inputs[1]), Constant) + ) + + return is_match + + def transform(self, model, node): + """ + Merge of two constants makes another constant + """ + const_node0 = node.get_input_node(node.inputs[0]) + const_node1 = node.get_input_node(node.inputs[1]) + + val0 = const_node0.value + val1 = const_node1.value + + op = node.attributes["op"] + if op in ('add', 'sum'): + new_val = val0 + val1 + elif op == 'sub': + new_val = val0 - val1 + elif op == 'mul': + new_val = val0 * val1 + elif op == 'div': + new_val = val0 / val1 + elif op == 'average': + new_val = np.mean(np.array([val0, val1]), axis=0) + elif op == 'max': + new_val = np.maximum(val0, val1) + elif op == 'min': + new_val = np.minimum(val0, val1) + else: + raise RuntimeError(f"Unexpected op_type: {op}") + + quantizer = node.get_attr("quantizer") # None if not defined + if quantizer: + const_node0.set_attr("quantizer", quantizer) + const_node0.set_attr("value", new_val) + + quant_precision = node.get_attr("quant_precision") + if quant_precision: + const_node0.set_attr("quant_precision", quant_precision) + + # reinitialize (which also runs quantization if quantizer exists) + const_node0.initialize() + + model.remove_node(const_node1, rewire=False) + + # remove the batch norm node + model.remove_node(node, rewire=True) + + return True + + +class MergeToBatchNormalization(OptimizerPass): + """Convert Add, Sub, Mul, or Div Merges with consant to BatchNormalization""" + + def match(self, node): + is_match = ( + isinstance(node, Merge) + and node.attributes["op"] in ("add", "sum", "sub", "mul") # Div is separate + and ( + isinstance(node.get_input_node(node.inputs[0]), Constant) + != isinstance(node.get_input_node(node.inputs[1]), Constant) + ) + ) + # note: != for booleans is xor. + return is_match + + def transform(self, model, node): + node1 = node.get_input_node(node.inputs[1]) + + node1const = isinstance(node1, Constant) + if node1const: + const_node = node1 + input_node_idx = 0 + else: + const_node = node.get_input_node(node.inputs[0]) + input_node_idx = 1 + + input_shape = node.get_input_variable(node.inputs[input_node_idx]).shape + n_in = np.prod(input_shape) + + scale_precision = None + scale_quantizer = None + bias_precision = None + bias_quantizer = None + + op = node.attributes["op"] + if op in ('add', 'sum'): + scale = np.array(1) + bias = const_node.value + bias_precision = const_node.get_attr("quant_precision") + bias_quantizer = const_node.get_attr("quantizer") + elif op == 'sub': + if node1const: + scale = np.array(1) + bias = -const_node.value + else: + scale = np.array(-1) + bias = const_node.value + bias_precision = const_node.get_attr("quant_precision") + bias_quantizer = const_node.get_attr("quantizer") + if bias_precision and not bias_precision.signed: + # need to add a bit + bias_precision.signed = 1 + bias_precision.width += 1 + bias_precision.integer += 1 + bias_quantizer = QuantNodeQuantizer(bias_precision) + + elif op == 'mul': + scale = const_node.value + bias = np.array(0) + scale_precision = const_node.get_attr("quant_precision") + scale_quantizer = const_node.get_attr("quantizer") + + attributes = {k: node.attributes.get(k, None) for k in _base_attributes} + attributes.update( + { + "scale_data": scale, + "bias_data": bias, + "n_in": n_in, + "n_out": n_in, + "n_filt": -1, + "scale_precision": scale_precision, + "scale_quantizer": scale_quantizer, + "bias_precision": bias_precision, + "bias_quantizer": bias_quantizer, + } + ) + + bn_layer = model.make_node( + BatchNormalization, f"bn_{node.name}", attributes, [node.inputs[input_node_idx]], [x for x in node.outputs] + ) + + model.remove_node(const_node, rewire=False) + model.replace_node(node, bn_layer) + + return True + + +class MergeToBatchNormalizationDiv(OptimizerPass): + """ + Convert Div Merges with consant to BatchNormalization + + TODO: propagate precision + """ + + def match(self, node): + is_match = ( + isinstance(node, Merge) + and node.attributes["op"] == 'div' + and isinstance(node.get_input_node(node.inputs[1]), Constant) + ) # only second can be const + + return is_match + + def transform(self, model, node): + input_shape = node.get_input_variable().shape + n_in = np.prod(input_shape) + const_node = node.get_input_node(node.inputs[1]) + scale = 1 / const_node.value + bias = np.array(0) + + attributes = {k: node.attributes.get(k, None) for k in _base_attributes} + attributes.update({"scale_data": scale, "bias_data": bias, "n_in": n_in, "n_out": n_in, "n_filt": -1}) + + bn_layer = model.make_node( + "BatchNormalization", f"bn_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs] + ) + + model.remove_node(const_node, rewire=False) + model.replace_node(node, bn_layer) + + return True diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py new file mode 100644 index 0000000000..e97fd89947 --- /dev/null +++ b/hls4ml/model/optimizer/passes/move_scales.py @@ -0,0 +1,301 @@ +''' +This file includes optimizations related to moving the ApplyAphas across MatMul and Conv nodes. + +TODO: Check that biases are properly handled. (Attempt to do it via Merge) + +''' +import numpy as np + +from hls4ml.model.layers import ApplyAlpha, Constant, Conv, MatMul, Merge +from hls4ml.model.optimizer import OptimizerPass + + +class ScaleDownMatMul(OptimizerPass): + '''Shift an ApplyAlpha below a MatMul''' + + def match(self, node): + ''' + Check to see if we have a MatMul with at least one input ApplyAlpha. + Note, if both are this optimition runs twice. + ''' + is_match = ( + isinstance(node, MatMul) + and len(node.inputs) == 2 + and ( + isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha) + or isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha) + ) + ) + return is_match + + def transform(self, model, node): + # determine input with ApplyAlpha. If both, first propagate apply alpha associated with a constant + is_aa = [False, False] + from_const = [False, False] + inp = [node.get_input_node(node.inputs[0]), node.get_input_node(node.inputs[1])] + for i in range(2): + if isinstance(inp[i], ApplyAlpha): + is_aa[i] = True + from_const[i] = isinstance(inp[i].get_input_node(inp[i].inputs[0]), Constant) + + # prefer alpha from constant + if from_const[0]: + alpha_idx = 0 + elif from_const[1]: + alpha_idx = 1 + elif is_aa[0]: + alpha_idx = 0 + else: + alpha_idx = 1 # is_aa[1] must be true + + apply_alpha = inp[alpha_idx] + other_idx = 0 if alpha_idx else 1 + + # Check if we can move + scale = apply_alpha.weights['scale'].data_unquantized + bias = apply_alpha.weights['bias'].data_unquantized + + scale1d = np.ravel(scale) + if (scale1d[0] == scale).all(): + # scalar scale + scale = np.array(scale1d[0]) + + bias1d = np.ravel(bias) + if (bias1d[0] == bias).all(): + # scalar bias + bias = np.array(bias1d[0]) + + output = node.get_output_variable() + + can_propagate = False + if not bias.shape and bias == 0: + # zero bias, propagate through, if possible + # (always possible if scale is scalar) + try: + np.broadcast_to(scale, output.shape) # check size compatibility + newscale = scale + newbias = np.array(0) + can_propagate = True + except ValueError: + can_propagate = False + + # if did not succeed in propagating, try again + if not can_propagate and isinstance(inp[other_idx], Constant): + # can handle nonzero bias in some cases if other value is a Constant + try: + np.broadcast_to(scale, output.shape) # check size compatibility + newscale = scale + newbias = inp[other_idx].value * bias + np.broadcast_to(newbias, output.shape) + can_propagate = True + except ValueError: + can_propagate = False + + if not can_propagate: + return False + + model.remove_node(apply_alpha) + + new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs]) + new_node.add_weights(newscale) + new_node.add_bias(newbias) + model.insert_node(new_node) + return True + + +class ScaleDownAdd(OptimizerPass): + '''Shift an identical ApplyAlpha below a Merge (Add)''' + + def match(self, node): + '''Check to see if we have an add with two ApplyAlphas with identical scale''' + is_match = isinstance(node, Merge) and len(node.inputs) == 2 and node.attributes["op"] == "add" + if is_match: + in0 = node.get_input_node(node.inputs[0]) + in1 = node.get_input_node(node.inputs[1]) + is_match = ( + isinstance(in0, ApplyAlpha) + and isinstance(in1, ApplyAlpha) + and (in0.weights['scale'].data_unquantized == in1.weights['scale'].data_unquantized).all() + ) + return is_match + + def transform(self, model, node): + in0 = node.get_input_node(node.inputs[0]) + in1 = node.get_input_node(node.inputs[1]) + + # Check if we can move + scale = in0.weights['scale'].data_unquantized + bias0 = in0.weights['bias'].data_unquantized + bias1 = in1.weights['bias'].data_unquantized + try: + bias = bias0 + bias1 + except ValueError: + return False + + model.remove_node(in0) + model.remove_node(in1) + + new_node = model.make_node('ApplyAlpha', in0.name, in0.attributes, [x for x in node.outputs]) + new_node.add_weights(scale) + new_node.add_bias(bias) + model.insert_node(new_node) + return True + + +class ScaleDownConv(OptimizerPass): + '''Shift an ApplyAlpha on input below a Conv''' + + def match(self, node): + '''Shift an ApplyAlpha from the Weight''' + is_match = isinstance(node, Conv) and isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha) + + return is_match + + def transform(self, model, node): + apply_alpha = node.get_input_node(node.inputs[0]) + + # Check if we can move + scale = apply_alpha.weights['scale'].data_unquantized + bias = apply_alpha.weights['bias'].data_unquantized + + scale1d = np.ravel(scale) + if (scale1d[0] == scale).all(): + # scalar scale + scale = np.array(scale1d[0]) + + bias1d = np.ravel(bias) + if (bias1d[0] == bias).all(): + # scalar bias + bias = np.array(bias1d[0]) + + output = node.get_output_variable() + + can_propagate = False + if not bias.shape and bias == 0: + # zero bias, propagate through, if possible + # (always possible if scale is scalar) + try: + np.broadcast_to(scale, output.shape) # check broadcastable + newscale = scale + newbias = np.array(0) + can_propagate = True + except ValueError: + can_propagate = False + + if not can_propagate: + return False + + model.remove_node(apply_alpha) + + new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs]) + new_node.add_weights(newscale) + new_node.add_bias(newbias) + model.insert_node(new_node) + return True + + +class ScaleDownWeightConv(OptimizerPass): + '''Shift an ApplyAlpha weight (from conv side) below a Conv''' + + def match(self, node): + '''Shift an ApplyAlpha from the Weight''' + is_match = ( + isinstance(node, Conv) and len(node.inputs) > 1 and isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha) + ) + + return is_match + + def transform(self, model, node): + apply_alpha = node.get_input_node(node.inputs[1]) + + # Check if we can move + scale = apply_alpha.weights['scale'].data_unquantized + bias = apply_alpha.weights['bias'].data_unquantized + + scale1d = np.ravel(scale) + if (scale1d[0] == scale).all(): + # scalar scale + scale = np.array(scale1d[0]) + + bias1d = np.ravel(bias) + if (bias1d[0] == bias).all(): + # scalar bias + bias = np.array(bias1d[0]) + + output = node.get_output_variable() + + can_propagate = False + if not bias.shape and bias == 0: + # zero bias, propagate through, if possible + # (always possible if scale is scalar) + try: + np.broadcast_to(scale, output.shape) # make sure broadcastable + newscale = scale + newbias = np.array(0) + can_propagate = True + except ValueError: + can_propagate = False + + if not can_propagate: + return False + + model.remove_node(apply_alpha) + + new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs]) + new_node.add_weights(newscale) + new_node.add_bias(newbias) + model.insert_node(new_node) + return True + + +class ScaleDownBiasConv(OptimizerPass): + '''Shift an ApplyAlpha bias (from conv side) below a Conv''' + + def match(self, node): + '''Shift an ApplyAlpha from the Weight''' + is_match = ( + isinstance(node, Conv) and len(node.inputs) > 2 and isinstance(node.get_input_node(node.inputs[2]), ApplyAlpha) + ) + + return is_match + + def transform(self, model, node): + apply_alpha = node.get_input_node(node.inputs[2]) + + # Check if we can move + scale = apply_alpha.weights['scale'].data_unquantized + bias = apply_alpha.weights['bias'].data_unquantized + + scale1d = np.ravel(scale) + if (scale1d[0] == scale).all(): + # scalar scale + scale = np.array(scale1d[0]) + + bias1d = np.ravel(bias) + if (bias1d[0] == bias).all(): + # scalar bias + bias = np.array(bias1d[0]) + + output = node.get_output_variable() + + can_propagate = False + if not scale.shape and scale == 1: + # No scale, just additional bias + try: + np.broadcast_to(bias, output.shape) + newscale = np.array(1) + newbias = bias + can_propagate = True + except ValueError: + can_propagate = False + + if not can_propagate: + return False + + model.remove_node(apply_alpha) + + new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs]) + new_node.add_weights(newscale) + new_node.add_bias(newbias) + model.insert_node(new_node) + return True diff --git a/hls4ml/model/optimizer/passes/propagate_conv_precision.py b/hls4ml/model/optimizer/passes/propagate_conv_precision.py new file mode 100644 index 0000000000..17e357df88 --- /dev/null +++ b/hls4ml/model/optimizer/passes/propagate_conv_precision.py @@ -0,0 +1,77 @@ +import math # prefer to use math.ceil for scalar values (returns int) + +import numpy as np + +from hls4ml.model.layers import Conv1D, Conv2D +from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.types import FixedPrecisionType, NamedType + + +class PropagateConvPrecision(OptimizerPass): + """Propagate precision for conv nodes. Restrict it to only cases where + the precision is set by a quant node, since otherwise the values get huge. + """ + + def match(self, node): + is_match = isinstance(node, (Conv1D, Conv2D)) + return is_match + + def transform(self, model, node): + input_precision = node.get_input_node().get_attr("quant_precision") + weight_precision = node.get_attr("weight_precision") + if not input_precision or not weight_precision: + return False + + bias_precision = node.get_attr("bias_precision") + num_feature_maps = node.weights['weight'].data_unquantized.shape[-1] + filt_width = node.get_attr('filt_width') + filt_height = node.get_attr('filt_height', 1) + + accum_precision = _propagate_type_conv( + input_precision, + weight_precision, + bias_precision, + num_feature_maps=num_feature_maps, + filt_width=filt_width, + filt_height=filt_height, + ) + + accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision) + node.set_attr('accum_t', accum_t) + + if not node.get_attr("quant_precision"): + # output precision not explicitly set by quant node + node.update_output_precision(accum_precision) + + return False + + +def _propagate_type_conv(input_precision, weight_precision, bias_precision, num_feature_maps, filt_width, filt_height): + ''' + Propagate the precion type across a multiply. Rounding modes are propagated from input_precision + ''' + + Nacc = filt_width * filt_height * num_feature_maps + bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(Nacc)) + integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(Nacc)) + signed = weight_precision.signed or input_precision.signed + + # Because calculating precision, no need to round or sautration + rounding_mode = None + saturation_mode = None + + frac = bitwidth - integer + + # correct for bias + if bias_precision: + integer = ( + max( + integer + (bias_precision.signed and not signed), + bias_precision.integer + (signed and not bias_precision.signed), + ) + + 1 + ) + bitwidth = integer + max(frac, bias_precision.width - bias_precision.integer) + signed = signed or bias_precision.signed + + return FixedPrecisionType(bitwidth, integer, signed, rounding_mode, saturation_mode) diff --git a/hls4ml/model/optimizer/passes/propagate_dense_precision.py b/hls4ml/model/optimizer/passes/propagate_dense_precision.py new file mode 100644 index 0000000000..cc50bb7553 --- /dev/null +++ b/hls4ml/model/optimizer/passes/propagate_dense_precision.py @@ -0,0 +1,70 @@ +import math # prefer to use math.ceil for scalar values (returns int) + +import numpy as np + +from hls4ml.model.layers import Dense +from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.types import FixedPrecisionType, NamedType + + +class PropagateDensePrecision(OptimizerPass): + """ + Propagate precision for Dense nodes. Restrict it to only cases where + the precision is set by a quant node, since otherwise the values get huge. + """ + + def match(self, node): + is_match = isinstance(node, Dense) + return is_match + + def transform(self, model, node): + input_precision = node.get_input_node().get_attr("quant_precision") + weight_precision = node.get_attr("weight_precision") + if not input_precision or not weight_precision: + return False + + bias_precision = node.get_attr("bias_precision") + input_variable = node.get_input_variable() + num_acc = input_variable.shape[-1] + + accum_precision = _propagate_type_dense(input_precision, weight_precision, bias_precision, num_acc) + + accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision) + node.set_attr('accum_t', accum_t) + + if not node.get_attr("quant_precision"): + # output precision not set by quant node + node.update_output_precision(accum_precision) + + return False + + +def _propagate_type_dense(input_precision, weight_precision, bias_precision, num_acc): + ''' + Propagate the precion type across a multiply. Rounding modes are propagated from input_precision + ''' + + # check to make sure none are None + bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(num_acc)) + integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(num_acc)) + signed = weight_precision.signed or input_precision.signed + + # Because calculating precision, no need to round or sautration + rounding_mode = None + saturation_mode = None + + frac = bitwidth - integer + + # correct for bias + if bias_precision: + integer = ( + max( + integer + (bias_precision.signed and not signed), + bias_precision.integer + (signed and not bias_precision.signed), + ) + + 1 + ) + bitwidth = integer + max(frac, bias_precision.width - bias_precision.integer) + signed = signed or bias_precision.signed + + return FixedPrecisionType(bitwidth, integer, signed, rounding_mode, saturation_mode) diff --git a/hls4ml/model/optimizer/passes/qkeras.py b/hls4ml/model/optimizer/passes/qkeras.py index 2d2b6b0f77..7bed6cb1e7 100644 --- a/hls4ml/model/optimizer/passes/qkeras.py +++ b/hls4ml/model/optimizer/passes/qkeras.py @@ -1,7 +1,7 @@ import numpy as np import tensorflow as tf -from hls4ml.model.layers import ApplyAlpha, BatchNormalization +from hls4ml.model.layers import ApplyAlpha from hls4ml.model.optimizer import ConfigurableOptimizerPass, OptimizerPass, register_pass from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType, QKerasPO2Quantizer @@ -81,7 +81,6 @@ def register_qkeras(): register_pass('output_rounding_saturation_mode', OutputRoundingSaturationMode) register_pass('qkeras_factorize_alpha', QKerasFactorizeAlpha) register_pass('extract_ternary_threshold', ExtractTernaryThreshold) - register_pass('fuse_consecutive_batch_normalization', FuseConsecutiveBatchNormalization) class QKerasFactorizeAlpha(OptimizerPass): @@ -181,38 +180,6 @@ def transform(self, model, node): return True -class FuseConsecutiveBatchNormalization(OptimizerPass): - '''OptimizerPass to merge consecutive BatchNormalization layers. - These may exist in a model after QKerasFactorizeAlpha layer. - Scale and Bias of each layer are combined into scale and bias of a single layer. - ''' - - def match(self, node): - return isinstance(node, BatchNormalization) and isinstance(node.get_input_node(), BatchNormalization) - - def transform(self, model, node): - bn0 = node.get_input_node() - bn1 = node - bn0_map = bn0.get_output_use_map() - bn1_map = bn1.get_output_use_map() - if len(bn0_map[bn0.name]) > 1 or len(bn1_map[bn1.name]) > 1: - return False - - s0 = bn0.weights['scale'].data - b0 = bn0.weights['bias'].data - s1 = bn1.weights['scale'].data - b1 = bn1.weights['bias'].data - - s2 = s0 * s1 - b2 = s1 * b0 + b1 - - bn0.weights['scale'].data = s2 - bn0.weights['bias'].data = b2 - - model.remove_node(node, rewire=True) - return True - - class ExtractTernaryThreshold(OptimizerPass): '''The input value (threshold) at which the output of a a ternary activation changes is configurable. This pass extracts that threshold point, inserting diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py new file mode 100644 index 0000000000..f0a5129d52 --- /dev/null +++ b/hls4ml/model/optimizer/passes/quant_opt.py @@ -0,0 +1,387 @@ +''' +This file includes optimizations related to quant nodes. + +As a first step, QuantConstantParameters converts the extra inputs to attributes. It is always the first step + +The next step differs between the case of (1) unitary scale and zero offset, or (2) nonunitary scale and/or +nonzero offset. In the first case no scaling is required, so a Quant node effectively becomes a linear activation. +For the common case when this is applied on a constant weight, the activation is immediately merged with the weight, +qantizing the weights. In case 2, we need to explictly scale and unscale, so the Quant node becomes 3 nodes, an +ApplyAlpha node to apply a scale/shift, a Linear node to apply the quantization, and another ApplyAlpha to unscale/shift. +We depend on optimization steps to move the unscaling ApplyAlpha down as needed. Again, when the Quant is a applied to a +Constant, the scaling and Linear nodes are immediately merged into the Constant. This is done because it simplifies some +of the other optimizations. + +UPDATE: Case 1 is loosened to also include power of 2 scalar scales, not just unitary scale, if + _ALSO_INCLUDE_PO2 is set to true (the default) + +''' +import math # prefer to use math.ceil for scalar values + +import numpy as np + +from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer +from hls4ml.model.layers import Activation, ApplyAlpha, Constant, Quant +from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.types import FixedPrecisionType + +_ALSO_MATCH_PO2 = True + +_base_attributes = ('Trace', 'reuse_factor') + + +class QuantConstantParameters(OptimizerPass): + """Remove Constant from the Qaunt node parameters (but not input[0])""" + + def match(self, node): + is_match = isinstance(node, Quant) and ( + (node.get_input_node(node.inputs[1]) and isinstance(node.get_input_node(node.inputs[1]), Constant)) + or (node.get_input_node(node.inputs[2]) and isinstance(node.get_input_node(node.inputs[2]), Constant)) + or (node.get_input_node(node.inputs[3]) and isinstance(node.get_input_node(node.inputs[3]), Constant)) + ) + + return is_match + + def transform(self, model, node): + """ + Remove Constant from the Qaunt node parameters (but not input[0]) + """ + if node.get_input_node(node.inputs[1]): + scale_node = node.get_input_node(node.inputs[1]) + if isinstance(scale_node, Constant): + node.set_attr('scale', scale_node.value) + node.inputs[1] = '' + model.remove_node(scale_node, rewire=False) + + if node.get_input_node(node.inputs[2]): + zeropt_node = node.get_input_node(node.inputs[2]) + if isinstance(zeropt_node, Constant): + node.set_attr('zeropt', zeropt_node.value) + node.inputs[2] = '' + model.remove_node(zeropt_node, rewire=False) + + if node.get_input_node(node.inputs[3]): + bitwidth_node = node.get_input_node(node.inputs[3]) + if isinstance(bitwidth_node, Constant): + if np.squeeze(bitwidth_node.value).shape: + raise RuntimeError("Only scalar bitwidth values are supporeted by the Quant node") + node.set_attr('bitwidth', bitwidth_node.value) + node.inputs[3] = '' + model.remove_node(bitwidth_node, rewire=False) + + return True + + +class QuantToActivation(OptimizerPass): + ''' + This is for the case when scale is 1 and zeropt is 0. It is a a 1:1 transformation of + a Quant to an Activation. + + As an optimization, this is not called when the input is constant. + + UPDATE: this is also called when scale is scalar and power of 2, not just 1. + ''' + + def match(self, node): + # only matches after the other inputs are already folded + + is_match = ( + isinstance(node, Quant) + and not isinstance(node.get_input_node(node.inputs[0]), Constant) + and not node.get_input_node(node.inputs[1]) + and not node.get_input_node(node.inputs[2]) + and not node.get_input_node(node.inputs[3]) + ) + + # Only match if the scale is 1s and the zero-point is 0s + if is_match: # to make sure this is a quant node with inputs + scale = node.get_attr("scale") + bias = node.get_attr("zeropt") + is_match = is_match and (bias == np.zeros_like(bias)).all() + + # check if scale is ones-like or a power of two + scale_unit_or_po2 = (scale == np.ones_like(scale)).all() + if not scale_unit_or_po2 and _ALSO_MATCH_PO2: + sqscale = np.squeeze(scale) + if not sqscale.shape: + # not an array + mantissa, _ = np.frexp(sqscale) + scale_unit_or_po2 = mantissa == 0.5 + + is_match = is_match and scale_unit_or_po2 + + return is_match + + def transform(self, model, node): + ''' + Change quant node to Activation + ''' + input_shape = node.get_input_variable().shape + + n_in = np.prod(input_shape) + + rounding_mode = node.get_attr("rounding_mode") + narrow = node.get_attr("narrow") + signed = node.get_attr("signed") + bitwidth = node.get_attr("bitwidth") + integer = bitwidth + scale = node.get_attr("scale") + if _ALSO_MATCH_PO2 and not (scale == np.ones_like(scale)).all(): + _, exp = np.frexp(np.squeeze(scale)) + integer = bitwidth + exp - 1 + + precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode) + + attributes = {k: node.attributes.get(k, None) for k in _base_attributes} + attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer, 'n_in': n_in}) + + new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs]) + new_node.get_output_variable().type.precision = precision + model.replace_node(node, new_node) + + return True + + +class FuseQuantWithConstant(OptimizerPass): + ''' + This is for the case when scale is 1 and zeropt is 0. It directly applies the quantization to a constant. + UPDATE: this is also called when scale is scalar and power of 2, not just 1. + ''' + + def match(self, node): + # only matches after the other inputs are already folded + is_match = ( + isinstance(node, Quant) + and isinstance(node.get_input_node(node.inputs[0]), Constant) + and not node.get_input_node(node.inputs[1]) + and not node.get_input_node(node.inputs[2]) + and not node.get_input_node(node.inputs[3]) + ) + + # Only match if the scale is 1s and the zero-point is 0s + if is_match: # to make sure this is a quant node with inputs + scale = node.get_attr("scale") + bias = node.get_attr("zeropt") + is_match = is_match and (bias == np.zeros_like(bias)).all() + + # check if scale is ones-like or a power of two + scale_unit_or_po2 = (scale == np.ones_like(scale)).all() + if not scale_unit_or_po2 and _ALSO_MATCH_PO2: + sqscale = np.squeeze(scale) + if not sqscale.shape: + # not an array + mantissa, _ = np.frexp(sqscale) + scale_unit_or_po2 = mantissa == 0.5 + + is_match = is_match and scale_unit_or_po2 + + return is_match + + def transform(self, model, node): + ''' + Fuse Quant with Constant. + ''' + + rounding_mode = node.get_attr("rounding_mode") + narrow = node.get_attr("narrow") + signed = node.get_attr("signed") + bitwidth = node.get_attr("bitwidth") + integer = bitwidth + scale = node.get_attr("scale") + if _ALSO_MATCH_PO2 and not (scale == np.ones_like(scale)).all(): + _, exp = np.frexp(np.squeeze(scale)) + integer = bitwidth + exp - 1 + + precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode) + + const_node = node.get_input_node(node.inputs[0]) + const_node.set_attr("quant_precision", precision) + const_node.set_attr("quantizer", quantizer) + + # reinitialize (which also runs quantization if quantizer exists) + const_node.initialize() + + # remove the Quant node + model.remove_node(node, rewire=True) + + return True + + +class QuantToAlphaActivationAlpha(OptimizerPass): + ''' + This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of + a Quant to an ApplyAlpha (to scale), Activatio, ApplyAlpho (to rescale). + + As an optimization, this is not called when the input is constant. + ''' + + def match(self, node): + # only matches after the other inputs are already folded + is_match = ( + isinstance(node, Quant) + and not isinstance(node.get_input_node(node.inputs[0]), Constant) + and not node.get_input_node(node.inputs[1]) + and not node.get_input_node(node.inputs[2]) + and not node.get_input_node(node.inputs[3]) + ) + + if is_match: # to make sure this is a quant node with inputs + scale = node.get_attr("scale") + bias = node.get_attr("zeropt") + is_match = is_match and ((scale != np.ones_like(scale)).any() or (bias != np.zeros_like(bias)).any()) + return is_match + + def transform(self, model, node): + ''' + Change quant node to ApplyAlhpa, Activation, ApplyAlpha + ''' + + # Do the Activation as in the simple case + + input_shape = node.get_input_variable().shape + + n_in = np.prod(input_shape) + + rounding_mode = node.get_attr("rounding_mode") + narrow = node.get_attr("narrow") + signed = node.get_attr("signed") + bitwidth = node.get_attr("bitwidth") + + precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode) + + attributes = {k: node.attributes.get(k, None) for k in _base_attributes} + attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer, 'n_in': n_in}) + + new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs]) + new_node.get_output_variable().type.precision = precision + model.replace_node(node, new_node) + + # but now add the ApplyAlhpas before and after + + scale = node.get_attr("scale") + bias = node.get_attr("zeropt") + + attributes_scale = {k: node.attributes.get(k, None) for k in _base_attributes} + attributes_scale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1}) + + attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes} + attributes_rescale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1}) + + firstscale = 1 / scale + firstbias = bias + attributes_scale["scale_data"] = firstscale + attributes_scale["bias_data"] = firstbias + + scale_node = model.make_node(ApplyAlpha, node.name + '_scale', attributes_scale, [node.inputs[0]]) + model.insert_node(scale_node) + + rescale = scale + rebias = -bias * scale + attributes_rescale["scale_data"] = rescale + attributes_rescale["bias_data"] = rebias + + rescale_node = model.make_node(ApplyAlpha, node.name + '_rescale', attributes_rescale, [new_node.outputs[0]]) + model.insert_node(rescale_node) + + return True + + +class ConstQuantToConstAlpha(OptimizerPass): + ''' + This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of + a Quant to an ApplyAlpha (to scale), Activation, ApplyAlpho (to unscale), but an input + consts allows for optimization, so the ApplyAlpha (to scale), Activation are + optimized away right away. + ''' + + def match(self, node): + # only matches after the other inputs are already folded + is_match = ( + isinstance(node, Quant) + and isinstance(node.get_input_node(node.inputs[0]), Constant) + and not node.get_input_node(node.inputs[1]) + and not node.get_input_node(node.inputs[2]) + and not node.get_input_node(node.inputs[3]) + ) + + if is_match: # to make sure this is a quant node with inputs + scale = node.get_attr("scale") + bias = node.get_attr("zeropt") + is_match = is_match and ((scale != np.ones_like(scale)).any() or (bias != np.zeros_like(bias)).any()) + return is_match + + def transform(self, model, node): + ''' + Change Constant + Quant node to Constant, ApplyAlpha + ''' + + # Do the Activation as in the simple case + + input_shape = node.get_input_variable().shape + + n_in = np.prod(input_shape) + + rounding_mode = node.get_attr("rounding_mode") + narrow = node.get_attr("narrow") + signed = node.get_attr("signed") + bitwidth = node.get_attr("bitwidth") + + precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode) + + const_node = node.get_input_node(node.inputs[0]) + + scale = node.get_attr("scale") + bias = node.get_attr("zeropt") + + # caclucate the new value + new_val = const_node.value / scale + bias + const_node.set_attr('value', new_val) + const_node.set_attr("quant_precision", precision) + const_node.set_attr("quantizer", quantizer) + + # reinitialize (which also runs quantization if quantizer exists) + const_node.initialize() + + attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes} + attributes_rescale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1}) + + rescale = scale + rebias = -bias * scale + attributes_rescale["scale_data"] = rescale + attributes_rescale["bias_data"] = rebias + + rescale_node = model.make_node( + ApplyAlpha, node.name + '_rescale', attributes_rescale, [x for x in node.inputs], [x for x in node.outputs] + ) + model.replace_node(node, rescale_node) + + return True + + +def _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode): + ''' + A function to determine the precision and quantizer + ''' + if rounding_mode == "ROUND": + bn_round = "AP_RND_CONV" + elif rounding_mode == "FLOOR": + bn_round = "AP_TRN" + else: + raise NotImplementedError( + f"Rounding mode {rounding_mode} not supported in Quant node. Only ROUND and FLOOR supported." + ) + + if narrow and not signed: + raise NotImplementedError("Narrow mode is only supported for singed numbers.") + + if narrow: + bn_sat = "AP_SAT_SYM" + else: + bn_sat = "AP_SAT" + + bitwidth = math.ceil(bitwidth) + integer = math.ceil(integer) + + precision = FixedPrecisionType(bitwidth, integer, signed, bn_round, bn_sat) + quantizer = QuantNodeQuantizer(precision) + return (precision, quantizer) diff --git a/hls4ml/model/optimizer/passes/reshape_const.py b/hls4ml/model/optimizer/passes/reshape_const.py new file mode 100644 index 0000000000..0012b2761e --- /dev/null +++ b/hls4ml/model/optimizer/passes/reshape_const.py @@ -0,0 +1,27 @@ +from hls4ml.model.layers import Constant, Reshape +from hls4ml.model.optimizer import OptimizerPass + + +class ReshapeConstant(OptimizerPass): + """ + ONNX has the target shape come as an input, not a parameter. This removes + the Constant input from new shape input. (Non-constant inputs are not supported.) + The constant value was already used; this is just a cleanup uptimization. + """ + + def match(self, node): + is_match = isinstance(node, Reshape) and len(node.inputs) > 1 and node.get_input_node(node.inputs[1]) + + return is_match + + def transform(self, model, node): + """ + Remove Constant from new shape input. Note, input shape node is already used on initialize + """ + shape_node = node.get_input_node(node.inputs[1]) + node.inputs[1] = '' + if not isinstance(shape_node, Constant): + raise RuntimeError("Nonconstant shape inputs are not currently supported") + model.remove_node(shape_node, rewire=False) + + return True From 5cea82d1bf0b6b82c3302bda6c7f482d603d8937 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 13 Jul 2023 11:49:57 -0500 Subject: [PATCH 06/62] snapshot that runs qonnx test, but gets incorrect results --- hls4ml/backends/fpga/fpga_backend.py | 12 +++- hls4ml/model/layers.py | 5 ++ hls4ml/model/optimizer/__init__.py | 10 +++ .../model/optimizer/passes/batchnorm_opt.py | 2 +- hls4ml/model/optimizer/passes/merge_const.py | 16 ++--- ...recision.py => propagate_acc_precision.py} | 53 ++++++++++---- .../passes/propagate_dense_precision.py | 70 ------------------- 7 files changed, 74 insertions(+), 94 deletions(-) rename hls4ml/model/optimizer/passes/{propagate_conv_precision.py => propagate_acc_precision.py} (59%) delete mode 100644 hls4ml/model/optimizer/passes/propagate_dense_precision.py diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py index 97e458f7fd..27620b1949 100644 --- a/hls4ml/backends/fpga/fpga_backend.py +++ b/hls4ml/backends/fpga/fpga_backend.py @@ -25,6 +25,7 @@ GlobalPooling1D, GlobalPooling2D, MatMul, + Merge, Pooling1D, Pooling2D, Quant, @@ -76,7 +77,16 @@ def __init__(self, name): attrs.append(TypeAttribute('accum')) self.attribute_map[layer] = attrs - rf_layers = accum_layers + [BatchNormalization, Activation, Embedding, GarNet, GarNetStack, Quant, BatchNormOnnx] + rf_layers = accum_layers + [ + BatchNormalization, + Activation, + Embedding, + GarNet, + GarNetStack, + Quant, + BatchNormOnnx, + Merge, + ] for layer in rf_layers: attrs = self.attribute_map.get(layer, []) diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index bd465ff7b9..1922dcec8c 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -249,6 +249,11 @@ def add_output_variable( self.set_attr(out_name, out) + def update_output_precision(self, precision, output_name=None): + if output_name is None: + output_name = self.outputs[0] + self.variables[output_name].type.precision = precision + def add_weights(self, quantizer=None, compression=False): self.add_weights_variable( name='weight', var_name='w{index}', data='weight', quantizer=quantizer, compression=compression diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index db65370e40..38844992db 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -43,6 +43,16 @@ 'fuse_quant_with_constant', 'quant_to_alpha_activation_alpha', 'const_quant_to_const_alpha', + 'batch_norm_onnx_constant_parameters', + 'constant_batch_norm_fusion', + 'merge_two_constants', + 'scale_down_add', + 'scale_down_mat_mul', + 'scale_down_weight_conv', + 'scale_down_bias_conv', + 'scale_down_conv', + 'merge_to_apply_alpha', + 'merge_to_apply_alpha_div', 'matmul_const_to_dense', 'conv_to_conv_x_d', 'output_rounding_saturation_mode', diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py index a7b0c27209..b9c651fd8f 100644 --- a/hls4ml/model/optimizer/passes/batchnorm_opt.py +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -3,7 +3,7 @@ from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant from hls4ml.model.optimizer import OptimizerPass -_base_attributes = ('Trace', 'reuse_factor', 'n_in', 'n_filt') +_base_attributes = ('Trace', 'reuse_factor', 'epsilon', 'n_in', 'n_filt') class BatchNormOnnxConstantParameters(OptimizerPass): diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py index 4e339ccc3f..da70eb55f3 100644 --- a/hls4ml/model/optimizer/passes/merge_const.py +++ b/hls4ml/model/optimizer/passes/merge_const.py @@ -1,7 +1,7 @@ import numpy as np from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer -from hls4ml.model.layers import BatchNormalization, Constant, Merge +from hls4ml.model.layers import ApplyAlpha, Constant, Merge from hls4ml.model.optimizer import OptimizerPass _base_attributes = ('Trace', 'reuse_factor', 'n_in') @@ -69,8 +69,8 @@ def transform(self, model, node): return True -class MergeToBatchNormalization(OptimizerPass): - """Convert Add, Sub, Mul, or Div Merges with consant to BatchNormalization""" +class MergeToApplyAlpha(OptimizerPass): + """Convert Add, Sub, Mul, or Div Merges with consant to ApplyAlpha""" def match(self, node): is_match = ( @@ -147,7 +147,7 @@ def transform(self, model, node): ) bn_layer = model.make_node( - BatchNormalization, f"bn_{node.name}", attributes, [node.inputs[input_node_idx]], [x for x in node.outputs] + ApplyAlpha, f"bn_{node.name}", attributes, [node.inputs[input_node_idx]], [x for x in node.outputs] ) model.remove_node(const_node, rewire=False) @@ -156,9 +156,9 @@ def transform(self, model, node): return True -class MergeToBatchNormalizationDiv(OptimizerPass): +class MergeToApplyAlphaDiv(OptimizerPass): """ - Convert Div Merges with consant to BatchNormalization + Convert Div Merges with consant to ApplyAlpha TODO: propagate precision """ @@ -182,9 +182,7 @@ def transform(self, model, node): attributes = {k: node.attributes.get(k, None) for k in _base_attributes} attributes.update({"scale_data": scale, "bias_data": bias, "n_in": n_in, "n_out": n_in, "n_filt": -1}) - bn_layer = model.make_node( - "BatchNormalization", f"bn_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs] - ) + bn_layer = model.make_node(ApplyAlpha, f"bn_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs]) model.remove_node(const_node, rewire=False) model.replace_node(node, bn_layer) diff --git a/hls4ml/model/optimizer/passes/propagate_conv_precision.py b/hls4ml/model/optimizer/passes/propagate_acc_precision.py similarity index 59% rename from hls4ml/model/optimizer/passes/propagate_conv_precision.py rename to hls4ml/model/optimizer/passes/propagate_acc_precision.py index 17e357df88..6c1facc23b 100644 --- a/hls4ml/model/optimizer/passes/propagate_conv_precision.py +++ b/hls4ml/model/optimizer/passes/propagate_acc_precision.py @@ -2,11 +2,43 @@ import numpy as np -from hls4ml.model.layers import Conv1D, Conv2D +from hls4ml.model.layers import Conv1D, Conv2D, Dense from hls4ml.model.optimizer import OptimizerPass from hls4ml.model.types import FixedPrecisionType, NamedType +class PropagateDensePrecision(OptimizerPass): + """ + Propagate precision for Dense nodes. Restrict it to only cases where + the precision is set by a quant node, since otherwise the values get huge. + """ + + def match(self, node): + is_match = isinstance(node, Dense) + return is_match + + def transform(self, model, node): + input_precision = node.get_input_node().get_attr("quant_precision") + weight_precision = node.get_attr("weight_precision") + if not input_precision or not weight_precision: + return False + + bias_precision = node.get_attr("bias_precision") + input_variable = node.get_input_variable() + num_acc = input_variable.shape[-1] + + accum_precision = _propagate_type_acc(input_precision, weight_precision, bias_precision, num_acc) + + accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision) + node.set_attr('accum_t', accum_t) + + if not node.get_attr("quant_precision"): + # output precision not set by quant node + node.update_output_precision(accum_precision) + + return False + + class PropagateConvPrecision(OptimizerPass): """Propagate precision for conv nodes. Restrict it to only cases where the precision is set by a quant node, since otherwise the values get huge. @@ -27,14 +59,9 @@ def transform(self, model, node): filt_width = node.get_attr('filt_width') filt_height = node.get_attr('filt_height', 1) - accum_precision = _propagate_type_conv( - input_precision, - weight_precision, - bias_precision, - num_feature_maps=num_feature_maps, - filt_width=filt_width, - filt_height=filt_height, - ) + num_acc = filt_width * filt_height * num_feature_maps + + accum_precision = _propagate_type_acc(input_precision, weight_precision, bias_precision, num_acc) accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision) node.set_attr('accum_t', accum_t) @@ -46,14 +73,14 @@ def transform(self, model, node): return False -def _propagate_type_conv(input_precision, weight_precision, bias_precision, num_feature_maps, filt_width, filt_height): +def _propagate_type_acc(input_precision, weight_precision, bias_precision, num_acc): ''' Propagate the precion type across a multiply. Rounding modes are propagated from input_precision ''' - Nacc = filt_width * filt_height * num_feature_maps - bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(Nacc)) - integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(Nacc)) + # check to make sure none are None + bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(num_acc)) + integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(num_acc)) signed = weight_precision.signed or input_precision.signed # Because calculating precision, no need to round or sautration diff --git a/hls4ml/model/optimizer/passes/propagate_dense_precision.py b/hls4ml/model/optimizer/passes/propagate_dense_precision.py deleted file mode 100644 index cc50bb7553..0000000000 --- a/hls4ml/model/optimizer/passes/propagate_dense_precision.py +++ /dev/null @@ -1,70 +0,0 @@ -import math # prefer to use math.ceil for scalar values (returns int) - -import numpy as np - -from hls4ml.model.layers import Dense -from hls4ml.model.optimizer import OptimizerPass -from hls4ml.model.types import FixedPrecisionType, NamedType - - -class PropagateDensePrecision(OptimizerPass): - """ - Propagate precision for Dense nodes. Restrict it to only cases where - the precision is set by a quant node, since otherwise the values get huge. - """ - - def match(self, node): - is_match = isinstance(node, Dense) - return is_match - - def transform(self, model, node): - input_precision = node.get_input_node().get_attr("quant_precision") - weight_precision = node.get_attr("weight_precision") - if not input_precision or not weight_precision: - return False - - bias_precision = node.get_attr("bias_precision") - input_variable = node.get_input_variable() - num_acc = input_variable.shape[-1] - - accum_precision = _propagate_type_dense(input_precision, weight_precision, bias_precision, num_acc) - - accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision) - node.set_attr('accum_t', accum_t) - - if not node.get_attr("quant_precision"): - # output precision not set by quant node - node.update_output_precision(accum_precision) - - return False - - -def _propagate_type_dense(input_precision, weight_precision, bias_precision, num_acc): - ''' - Propagate the precion type across a multiply. Rounding modes are propagated from input_precision - ''' - - # check to make sure none are None - bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(num_acc)) - integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(num_acc)) - signed = weight_precision.signed or input_precision.signed - - # Because calculating precision, no need to round or sautration - rounding_mode = None - saturation_mode = None - - frac = bitwidth - integer - - # correct for bias - if bias_precision: - integer = ( - max( - integer + (bias_precision.signed and not signed), - bias_precision.integer + (signed and not bias_precision.signed), - ) - + 1 - ) - bitwidth = integer + max(frac, bias_precision.width - bias_precision.integer) - signed = signed or bias_precision.signed - - return FixedPrecisionType(bitwidth, integer, signed, rounding_mode, saturation_mode) From d5394d4e59046daa6069ca22c8e3aec9ad934db7 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 13 Jul 2023 11:55:33 -0500 Subject: [PATCH 07/62] add quant node quantizer --- hls4ml/converters/onnx/quantizer.py | 97 +++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 hls4ml/converters/onnx/quantizer.py diff --git a/hls4ml/converters/onnx/quantizer.py b/hls4ml/converters/onnx/quantizer.py new file mode 100644 index 0000000000..7f69652c04 --- /dev/null +++ b/hls4ml/converters/onnx/quantizer.py @@ -0,0 +1,97 @@ +""" +Quantizer for the Quant node, after scale and zeropoint hafe been extracted +(unless scale is a power of 2, if doing special case po2) + +This is based on the sample implementation in finn-base +""" + +import numpy as np + +from hls4ml.model.types import Quantizer, RoundingMode, SaturationMode + + +class QuantNodeQuantizer(Quantizer): + """This implements a quantizer for a FixedPrecisionType with width==integer""" + + def __init__(self, precision): + super().__init__(precision.width, precision) + + def __call__(self, data): + """Apply the quantization on the data""" + + scale = 2 ** (self.hls_type.width - self.hls_type.integer) + + data = data * scale # (not using *= to avoid modifying data) + # Clamping + min_int_val = self._min_int(self.hls_type.signed, self.hls_type.saturation_mode, self.bits) + max_int_val = self._max_int(self.hls_type.signed, self.bits) + data = np.where(data > max_int_val, max_int_val, data) + data = np.where(data < min_int_val, min_int_val, data) + # Rounding + rounding_fx = self._resolve_rounding_mode(self.hls_type.rounding_mode) + return rounding_fx(data) / scale + + @staticmethod + def _min_int(signed: bool, saturation_mode: str, bit_width: int) -> int: + """Compute the minimum integer representable by a given number of bits. + Args: + signed (bool): Indicates whether the represented integer is signed or not. + saturation_mode (bool): Indicates the saturation mode used (AP_SAT_SYM or AP_SAT) + bit_width (int): Number of bits available for the representation. + Returns: + int: Maximum unsigned integer that can be represented according to + the input arguments. + Examples: + >>> min_int(signed=True, saturation_mode='AP_SAT_SYM', bit_width=8) + int(-127) + >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8) + int(0) + >>> min_int(signed=True, saturation_mode='AP_SAT', bit_width=8) + int(-128) + >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8) + int(0) + """ + if saturation_mode not in (SaturationMode.SAT_SYM, SaturationMode.SAT): + raise ValueError(f"Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported") + if signed and saturation_mode == SaturationMode.SAT_SYM: + value = -(2 ** (bit_width - 1)) + 1 + elif signed: + value = -(2 ** (bit_width - 1)) + else: + value = 0 + return value + + @staticmethod + def _max_int(signed: bool, bit_width: int) -> int: + """Compute the maximum integer representable by a given number of bits. + (Note, narrow and unsigned is not supported by the implementation, so saturation mode is not used) + Args: + signed (bool): Indicates whether the represented integer is signed or not. + bit_width (int): Number of bits available for the representation. + Returns: + Tensor: Maximum integer that can be represented according to + the input arguments. + Examples: + >>> max_int(signed=True, bit_width=8) + int(127) + >>> max_int(signed=False, bit_width=8) + int(255) + """ + if not signed: + value = (2**bit_width) - 1 + else: + value = (2 ** (bit_width - 1)) - 1 + return value + + @staticmethod + def _resolve_rounding_mode(mode): + """Resolve the rounding mode of Quant and Trunc ops + to the corresponding numpy functions.""" + if mode == RoundingMode.RND_CONV: + return np.round + # elif mode_string == "CEIL": # not supported + # return np.ceil + elif mode == RoundingMode.TRN: + return np.floor + else: + raise ValueError(f"Rounding mode {mode} not supported.") From 9817ed36f034f73030a6a5820a451f4199812641 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 13 Jul 2023 15:14:53 -0500 Subject: [PATCH 08/62] fix broadcasting when going from Merge to ApplyAlpha --- hls4ml/model/optimizer/passes/merge_const.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py index da70eb55f3..f38bfd841d 100644 --- a/hls4ml/model/optimizer/passes/merge_const.py +++ b/hls4ml/model/optimizer/passes/merge_const.py @@ -131,6 +131,12 @@ def transform(self, model, node): scale_precision = const_node.get_attr("quant_precision") scale_quantizer = const_node.get_attr("quantizer") + # because C++ doesn't do broadcasting, we may have to change the shapes of the scale and bias + if scale.shape != tuple(input_shape) and np.squeeze(scale).shape != tuple(input_shape): + scale = np.broadcast_to(scale, input_shape) + if bias.shape != tuple(input_shape) and np.squeeze(bias).shape != tuple(input_shape): + bias = np.broadcast_to(bias, input_shape) + attributes = {k: node.attributes.get(k, None) for k in _base_attributes} attributes.update( { @@ -179,6 +185,12 @@ def transform(self, model, node): scale = 1 / const_node.value bias = np.array(0) + # because C++ doesn't do broadcasting, we may have to change the shapes of the scale and bias + if scale.shape != tuple(input_shape) and np.squeeze(scale).shape != tuple(input_shape): + scale = np.broadcast_to(scale, input_shape) + if bias.shape != tuple(input_shape) and np.squeeze(bias).shape != tuple(input_shape): + bias = np.broadcast_to(bias, input_shape) + attributes = {k: node.attributes.get(k, None) for k in _base_attributes} attributes.update({"scale_data": scale, "bias_data": bias, "n_in": n_in, "n_out": n_in, "n_filt": -1}) From e494f435b55f396e2bf8d3c8c1350f5fa753fbb3 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 13 Jul 2023 15:49:56 -0500 Subject: [PATCH 09/62] update linear merging --- hls4ml/model/optimizer/__init__.py | 1 + hls4ml/model/optimizer/passes/linear.py | 42 +++++++++++++++++++++++++ hls4ml/model/optimizer/passes/nop.py | 14 --------- 3 files changed, 43 insertions(+), 14 deletions(-) create mode 100644 hls4ml/model/optimizer/passes/linear.py delete mode 100644 hls4ml/model/optimizer/passes/nop.py diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index 38844992db..e41973b4e2 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -70,6 +70,7 @@ 'fuse_batch_normalization', 'replace_multidimensional_dense_with_conv', 'set_precision_concat', + 'merge_linear_activation', ], requires=['convert'], ) diff --git a/hls4ml/model/optimizer/passes/linear.py b/hls4ml/model/optimizer/passes/linear.py new file mode 100644 index 0000000000..72d6dade9f --- /dev/null +++ b/hls4ml/model/optimizer/passes/linear.py @@ -0,0 +1,42 @@ +from hls4ml.model.layers import Activation, BatchNormalization, Conv1D, Conv2D, Dense +from hls4ml.model.optimizer import OptimizerPass + + +class EliminateLinearActivation(OptimizerPass): + def match(self, node): + cast = False + if isinstance(node, Activation): + cast = node.get_input_variable().type.precision != node.get_output_variable().type.precision + return isinstance(node, Activation) and node.get_attr('activation') == 'linear' and not cast + + def transform(self, model, node): + model.remove_node(node) + return True + + +# TODO: Move migrate this to auto precisoin check from quant precision check +class MergeLinearActivation(OptimizerPass): + ''' + For many objects it's safe to change the output precision independently of the calculation. + ''' + + def match(self, node): + ''' + Only match if the parent is safe and the precision is not explicitly set. + ''' + if isinstance(node, Activation) and node.get_attr('activation') == 'linear': + parent = node.get_input_node(node.inputs[0]) + safe_parent = isinstance(parent, (Dense, Conv1D, Conv2D, BatchNormalization)) + parent_type_fixed = parent.get_attr("quant_precision") + return safe_parent and not parent_type_fixed + else: + return False + + def transform(self, model, node): + prev_node = node.get_input_node(node.inputs[0]) + quant_precision = node.get_attr("quant_precision") + prev_node.set_attr("quant_precision", quant_precision) + prev_node.set_attr("quantizer", node.get_attr("quantizer")) + prev_node.update_output_precision(quant_precision) + model.remove_node(node) + return True diff --git a/hls4ml/model/optimizer/passes/nop.py b/hls4ml/model/optimizer/passes/nop.py deleted file mode 100644 index 55fcf16e93..0000000000 --- a/hls4ml/model/optimizer/passes/nop.py +++ /dev/null @@ -1,14 +0,0 @@ -from hls4ml.model.layers import Activation -from hls4ml.model.optimizer import OptimizerPass - - -class EliminateLinearActivation(OptimizerPass): - def match(self, node): - cast = False - if isinstance(node, Activation): - cast = node.get_input_variable().type.precision != node.get_output_variable().type.precision - return isinstance(node, Activation) and node.get_attr('activation') == 'linear' and not cast - - def transform(self, model, node): - model.remove_node(node) - return True From ffddb5e898a7689cf73cdaf50ca118c4104f3c35 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 13 Jul 2023 16:25:03 -0500 Subject: [PATCH 10/62] update automatic setting of accumulators (QONNX-only for now) --- hls4ml/model/optimizer/__init__.py | 2 ++ hls4ml/model/optimizer/passes/propagate_acc_precision.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index e41973b4e2..6af9698a51 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -69,6 +69,8 @@ 'fuse_consecutive_batch_normalization', 'fuse_batch_normalization', 'replace_multidimensional_dense_with_conv', + 'propagate_dense_precision', + 'propagate_conv_precision', 'set_precision_concat', 'merge_linear_activation', ], diff --git a/hls4ml/model/optimizer/passes/propagate_acc_precision.py b/hls4ml/model/optimizer/passes/propagate_acc_precision.py index 6c1facc23b..375979de4e 100644 --- a/hls4ml/model/optimizer/passes/propagate_acc_precision.py +++ b/hls4ml/model/optimizer/passes/propagate_acc_precision.py @@ -6,6 +6,8 @@ from hls4ml.model.optimizer import OptimizerPass from hls4ml.model.types import FixedPrecisionType, NamedType +# TODO: Update these to use the new auto precision, not depdening only on QONNX values + class PropagateDensePrecision(OptimizerPass): """ From 57c89fb7da6cebdd8d8fe4e72ea6a31ea0c1a16a Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 13 Jul 2023 17:39:47 -0500 Subject: [PATCH 11/62] update qonnx tests --- test/pytest/test_qonnx.py | 144 ++++++++++++++------------------------ 1 file changed, 54 insertions(+), 90 deletions(-) mode change 100755 => 100644 test/pytest/test_qonnx.py diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py old mode 100755 new mode 100644 index be567d81f9..535bffb0da --- a/test/pytest/test_qonnx.py +++ b/test/pytest/test_qonnx.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import os import urllib from pathlib import Path @@ -17,8 +16,11 @@ test_root_path = Path(__file__).parent -def test_tfc_2w2a(): - # download test model +@pytest.fixture(scope='module') +def tfc_2w2a_model(): + ''' + Load the tiny fully-connected model + ''' dl_dir = test_root_path dl_file = str(dl_dir / "qonnx-tfc-2w2a.onnx") tfc_w2a2_qonnx_url = ( @@ -32,50 +34,60 @@ def test_tfc_2w2a(): # cleanup qonnx.util.cleanup.cleanup(dl_file, out_file=out_file) model = ModelWrapper(out_file) + return model - # Execute QONNX model inference - # TODO make the test bigger - ishape = (1, 1, 28, 28) - np.random.seed(0) - X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32) - idict = {model.graph.input[0].name: X} - y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] - # Convert QONNX model, compile, and run inference - config = hls4ml.utils.config_from_onnx_model(model) - # Some hand-derived config - # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation - config['LayerName'] = {} - config['LayerName']['global_in'] = {'Precision': 'ap_fixed<16,2>'} - hls_model = hls4ml.converters.convert_from_onnx_model( - model, output_dir=str(test_root_path / 'hls4mlprj_qonnx_tfc-2w2a'), part='xcu250-figd2104-2L-e', hls_config=config +@pytest.fixture(scope='module') +def cnv_2w2a_model(): + ''' + Load the small convolution model + ''' + dl_dir = test_root_path + dl_file = str(dl_dir / "qonnx-cnv-2w2a.onnx") + cnv_w2a2_qonnx_url = ( + "https://raw.githubusercontent.com/fastmachinelearning/" + "QONNX_model_zoo/main/models/CIFAR10/Brevitas_FINN_CNV/CNV_2W2A.onnx" ) - hls_model.compile() - y_hls4ml = hls_model.predict(X) + urllib.request.urlretrieve(cnv_w2a2_qonnx_url, dl_file) + assert os.path.isfile(dl_file) + out_clean = str(dl_dir / "qonnx-cnv-2w2a-clean.onnx") + out_chanlast = str(dl_dir / "qonnx-cnv-2w2a-clean-channels-last.onnx") + out_file = str(dl_dir / "qonnx-cnv-2w2a-clean-channels-last-clean.onnx") - np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) + # cleanup + qonnx.util.cleanup.cleanup(dl_file, out_file=out_clean) + qonnx.util.to_channels_last.to_channels_last(out_clean, make_input_channels_last=True, out_file=out_chanlast) + qonnx.util.cleanup.cleanup(out_chanlast, out_file=out_file) + model = ModelWrapper(out_file) + return model -def test_tfc_2w2a_quartus(): - # download test model +@pytest.fixture(scope='module') +def jettagging_model(): + ''' + Load the 3 hidden layer QKeras example model trained on the jet tagging dataset + ''' dl_dir = test_root_path - dl_file = str(dl_dir / "qonnx-tfc-2w2a.onnx") - tfc_w2a2_qonnx_url = ( + dl_file = str(dl_dir / "qkeras_jettagging.onnx") + jet_tagging_qonnx_url = ( "https://raw.githubusercontent.com/fastmachinelearning/" - "QONNX_model_zoo/main/models/MNIST/Brevitas_FINN_TFC/TFC/TFC_2W2A.onnx" + "QONNX_model_zoo/main/models/JetTagging/QKeras_hls4ml_3layer/qkeras_jettagging.onnx" ) - urllib.request.urlretrieve(tfc_w2a2_qonnx_url, dl_file) + urllib.request.urlretrieve(jet_tagging_qonnx_url, dl_file) assert os.path.isfile(dl_file) - out_file = str(dl_dir / "qonnx-tfc-2w2a-clean.onnx") + out_file = str(dl_dir / "qkeras_jettagging-clean.onnx") # cleanup qonnx.util.cleanup.cleanup(dl_file, out_file=out_file) model = ModelWrapper(out_file) + return model + + +@pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus']) +def test_tfc_2w2a(tfc_2w2a_model, backend): + model = tfc_2w2a_model - # Execute QONNX model inference - # TODO make the test bigger ishape = (1, 1, 28, 28) - np.random.seed(0) X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32) idict = {model.graph.input[0].name: X} y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] @@ -83,15 +95,10 @@ def test_tfc_2w2a_quartus(): # Convert QONNX model, compile, and run inference config = hls4ml.utils.config_from_onnx_model(model) # Some hand-derived config - # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation config['LayerName'] = {} - config['LayerName']['global_in'] = {'Precision': 'ac_fixed<16,2>'} + config['LayerName']['global_in'] = {'Precision': 'ap_fixed<16,2>'} hls_model = hls4ml.converters.convert_from_onnx_model( - model, - output_dir=str(test_root_path / 'hls4mlprj_qonnx_tfc-2w2a-quartus'), - part='Arria10', - backend='Quartus', - hls_config=config, + model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_tfc-2w2a_{backend}'), backend=backend, hls_config=config ) hls_model.compile() y_hls4ml = hls_model.predict(X) @@ -99,45 +106,22 @@ def test_tfc_2w2a_quartus(): np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) -def test_cnv_2w2a(): - # download test model - dl_dir = test_root_path - dl_file = str(dl_dir / "qonnx-cnv-2w2a.onnx") - cnv_w2a2_qonnx_url = ( - "https://raw.githubusercontent.com/fastmachinelearning/" - "QONNX_model_zoo/main/models/CIFAR10/Brevitas_FINN_CNV/CNV_2W2A.onnx" - ) - urllib.request.urlretrieve(cnv_w2a2_qonnx_url, dl_file) - assert os.path.isfile(dl_file) - out_clean = str(dl_dir / "qonnx-cnv-2w2a-clean.onnx") - out_chanlast = str(dl_dir / "qonnx-cnv-2w2a-clean-channels-last.onnx") - out_file = str(dl_dir / "qonnx-cnv-2w2a-clean-channels-last-clean.onnx") - - # cleanup - qonnx.util.cleanup.cleanup(dl_file, out_file=out_clean) - qonnx.util.to_channels_last.to_channels_last(out_clean, make_input_channels_last=True, out_file=out_chanlast) - qonnx.util.cleanup.cleanup(out_chanlast, out_file=out_file) - model = ModelWrapper(out_file) +@pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus']) +def test_cnv_2w2a(cnv_2w2a_model, backend): + model = cnv_2w2a_model - # Execute QONNX model inference - # TODO make the test bigger ishape = (1, 32, 32, 3) - np.random.seed(1) X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32) idict = {model.graph.input[0].name: X} y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] # Convert QONNX model, compile, and run inference - config = hls4ml.utils.config_from_onnx_model(model) - config['Model']['Precision'] = 'ap_fixed<32,16>' - # Some hand-derived config - # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation - + config = hls4ml.utils.config_from_onnx_model(model, default_precision='fixed<32,16>') hls_model = hls4ml.converters.convert_from_onnx_model( model, - output_dir=str(test_root_path / 'hls4mlprj_qonnx_cnv-2w2a'), - part='xcu250-figd2104-2L-e', + output_dir=str(test_root_path / f'hls4mlprj_qonnx_cnv-2w2a_{backend}'), io_type='io_stream', + backend=backend, hls_config=config, ) hls_model.compile() @@ -146,35 +130,19 @@ def test_cnv_2w2a(): np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) -@pytest.mark.parametrize('backend', ['Vivado', 'Quartus']) -def test_jet_tagging(backend): - # download test model - dl_dir = test_root_path - dl_file = dl_dir / "qkeras_jettagging.onnx" - jet_tagging_qonnx_url = ( - "https://raw.githubusercontent.com/fastmachinelearning/" - "QONNX_model_zoo/main/models/JetTagging/QKeras_hls4ml_3layer/qkeras_jettagging.onnx" - ) - urllib.request.urlretrieve(jet_tagging_qonnx_url, dl_file) - assert os.path.isfile(dl_file) - out_file = dl_dir / "qkeras_jettagging-clean.onnx" - - # cleanup - qonnx.util.cleanup.cleanup(dl_file, out_file=out_file) - model = ModelWrapper(out_file) +@pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus']) +def test_jet_tagging(jettagging_model, backend): + model = jettagging_model # Execute QONNX model inference # TODO make the test bigger ishape = (1, 16) - np.random.seed(0) X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32) idict = {model.graph.input[0].name: X} y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] # Convert QONNX model, compile, and run inference config = hls4ml.utils.config_from_onnx_model(model) - # Some hand-derived config - # TODO should be auto-derived by QuantizeDenseOutput pass after some adaptation hls_model = hls4ml.converters.convert_from_onnx_model( model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_jettag_{backend}'), backend=backend, hls_config=config @@ -183,7 +151,3 @@ def test_jet_tagging(backend): y_hls4ml = hls_model.predict(X) np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) - - -if __name__ == '__main__': - test_tfc_2w2a() From 233905a0dac338e720a114ec2671aca1a2cd64f4 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Tue, 18 Jul 2023 11:32:37 -0500 Subject: [PATCH 12/62] remove batch dimension from flatten in Keras --- hls4ml/converters/keras/reshape.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hls4ml/converters/keras/reshape.py b/hls4ml/converters/keras/reshape.py index bd9d519a2a..1f6dc2a759 100644 --- a/hls4ml/converters/keras/reshape.py +++ b/hls4ml/converters/keras/reshape.py @@ -11,8 +11,8 @@ def parse_flatten_layer(keras_layer, input_names, input_shapes, data_reader): layer = parse_default_keras_layer(keras_layer, input_names) layer['class_name'] = 'Reshape' - layer['target_shape'] = [input_shapes[0][0], np.prod(input_shapes[0][1:])] - output_shape = layer['target_shape'] + layer['target_shape'] = [np.prod(input_shapes[0][1:])] # target shape has no batch dimension + output_shape = input_shapes[0][:1] + layer['target_shape'] return layer, output_shape From 6f119551c9586ada7cdb6e9c64c5956b1198023c Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 3 Aug 2023 17:15:26 -0500 Subject: [PATCH 13/62] fix optimizer that fuses consecutive batch norms --- hls4ml/model/optimizer/passes/batchnorm_opt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py index b9c651fd8f..a74047676d 100644 --- a/hls4ml/model/optimizer/passes/batchnorm_opt.py +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -162,8 +162,8 @@ def transform(self, model, node): bias_new = s1 * b0 + b1 # call function so that quantizer would be called if needed - node.add_weights(scale_new, quantizer=s_quantizer) - node.add_bias(bias_new, quantizer=b_quantizer) + node.add_weights_variable(name='scale', var_name='s{index}', data=scale_new) + node.add_weights_variable(name='bias', var_name='b{index}', data=bias_new) model.remove_node(prev_node, rewire=True) return True From 76be67b5779b38486a094b465898e087fa9e3339 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Fri, 2 Feb 2024 18:52:57 -0600 Subject: [PATCH 14/62] snapshot of work --- hls4ml/converters/keras/core.py | 3 +- hls4ml/converters/keras/graph.py | 2 +- hls4ml/converters/keras/qkeras.py | 3 +- hls4ml/converters/onnx/quantizer.py | 97 ------- hls4ml/model/layers.py | 7 +- hls4ml/model/optimizer/passes/merge_const.py | 2 +- .../passes/propagate_acc_precision.py | 106 ------- hls4ml/model/optimizer/passes/qkeras.py | 3 +- hls4ml/model/optimizer/passes/quant_opt.py | 229 +++++++-------- hls4ml/model/quantizers.py | 261 ++++++++++++++++++ hls4ml/model/types.py | 156 ----------- test/pytest/test_qonnx.py | 6 +- 12 files changed, 378 insertions(+), 497 deletions(-) delete mode 100644 hls4ml/converters/onnx/quantizer.py delete mode 100644 hls4ml/model/optimizer/passes/propagate_acc_precision.py create mode 100644 hls4ml/model/quantizers.py diff --git a/hls4ml/converters/keras/core.py b/hls4ml/converters/keras/core.py index f6119c016d..ca7d0b3541 100644 --- a/hls4ml/converters/keras/core.py +++ b/hls4ml/converters/keras/core.py @@ -1,5 +1,6 @@ from hls4ml.converters.keras_to_hls import get_weights_data, keras_handler, parse_default_keras_layer -from hls4ml.model.types import BinaryQuantizer, IntegerPrecisionType, TernaryQuantizer +from hls4ml.model.quantizers import BinaryQuantizer, TernaryQuantizer +from hls4ml.model.types import IntegerPrecisionType @keras_handler('InputLayer') diff --git a/hls4ml/converters/keras/graph.py b/hls4ml/converters/keras/graph.py index 5c5c2247c0..954bf20b8f 100644 --- a/hls4ml/converters/keras/graph.py +++ b/hls4ml/converters/keras/graph.py @@ -1,5 +1,5 @@ -from hls4ml.converters.keras.core import TernaryQuantizer from hls4ml.converters.keras_to_hls import get_weights_data, keras_handler, parse_default_keras_layer +from hls4ml.model.quantizers import TernaryQuantizer @keras_handler('GarNet', 'GarNetStack') diff --git a/hls4ml/converters/keras/qkeras.py b/hls4ml/converters/keras/qkeras.py index ba1401cce0..055ed3a8f4 100644 --- a/hls4ml/converters/keras/qkeras.py +++ b/hls4ml/converters/keras/qkeras.py @@ -3,7 +3,8 @@ from hls4ml.converters.keras.convolution import parse_conv1d_layer, parse_conv2d_layer from hls4ml.converters.keras.core import parse_batchnorm_layer, parse_dense_layer from hls4ml.converters.keras_to_hls import keras_handler, parse_default_keras_layer -from hls4ml.model.types import FixedPrecisionType, QKerasBinaryQuantizer, QKerasPO2Quantizer, QKerasQuantizer +from hls4ml.model.quantizers import QKerasBinaryQuantizer, QKerasPO2Quantizer, QKerasQuantizer +from hls4ml.model.types import FixedPrecisionType def get_quantizer_from_config(keras_layer, quantizer_var): diff --git a/hls4ml/converters/onnx/quantizer.py b/hls4ml/converters/onnx/quantizer.py deleted file mode 100644 index 7f69652c04..0000000000 --- a/hls4ml/converters/onnx/quantizer.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -Quantizer for the Quant node, after scale and zeropoint hafe been extracted -(unless scale is a power of 2, if doing special case po2) - -This is based on the sample implementation in finn-base -""" - -import numpy as np - -from hls4ml.model.types import Quantizer, RoundingMode, SaturationMode - - -class QuantNodeQuantizer(Quantizer): - """This implements a quantizer for a FixedPrecisionType with width==integer""" - - def __init__(self, precision): - super().__init__(precision.width, precision) - - def __call__(self, data): - """Apply the quantization on the data""" - - scale = 2 ** (self.hls_type.width - self.hls_type.integer) - - data = data * scale # (not using *= to avoid modifying data) - # Clamping - min_int_val = self._min_int(self.hls_type.signed, self.hls_type.saturation_mode, self.bits) - max_int_val = self._max_int(self.hls_type.signed, self.bits) - data = np.where(data > max_int_val, max_int_val, data) - data = np.where(data < min_int_val, min_int_val, data) - # Rounding - rounding_fx = self._resolve_rounding_mode(self.hls_type.rounding_mode) - return rounding_fx(data) / scale - - @staticmethod - def _min_int(signed: bool, saturation_mode: str, bit_width: int) -> int: - """Compute the minimum integer representable by a given number of bits. - Args: - signed (bool): Indicates whether the represented integer is signed or not. - saturation_mode (bool): Indicates the saturation mode used (AP_SAT_SYM or AP_SAT) - bit_width (int): Number of bits available for the representation. - Returns: - int: Maximum unsigned integer that can be represented according to - the input arguments. - Examples: - >>> min_int(signed=True, saturation_mode='AP_SAT_SYM', bit_width=8) - int(-127) - >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8) - int(0) - >>> min_int(signed=True, saturation_mode='AP_SAT', bit_width=8) - int(-128) - >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8) - int(0) - """ - if saturation_mode not in (SaturationMode.SAT_SYM, SaturationMode.SAT): - raise ValueError(f"Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported") - if signed and saturation_mode == SaturationMode.SAT_SYM: - value = -(2 ** (bit_width - 1)) + 1 - elif signed: - value = -(2 ** (bit_width - 1)) - else: - value = 0 - return value - - @staticmethod - def _max_int(signed: bool, bit_width: int) -> int: - """Compute the maximum integer representable by a given number of bits. - (Note, narrow and unsigned is not supported by the implementation, so saturation mode is not used) - Args: - signed (bool): Indicates whether the represented integer is signed or not. - bit_width (int): Number of bits available for the representation. - Returns: - Tensor: Maximum integer that can be represented according to - the input arguments. - Examples: - >>> max_int(signed=True, bit_width=8) - int(127) - >>> max_int(signed=False, bit_width=8) - int(255) - """ - if not signed: - value = (2**bit_width) - 1 - else: - value = (2 ** (bit_width - 1)) - 1 - return value - - @staticmethod - def _resolve_rounding_mode(mode): - """Resolve the rounding mode of Quant and Trunc ops - to the corresponding numpy functions.""" - if mode == RoundingMode.RND_CONV: - return np.round - # elif mode_string == "CEIL": # not supported - # return np.ceil - elif mode == RoundingMode.TRN: - return np.floor - else: - raise ValueError(f"Rounding mode {mode} not supported.") diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index 0df69b753e..7da730b60a 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -349,17 +349,17 @@ def initialize(self): class Constant(Layer): + # one could consider making this a weight attribute, but given it's transient nature, I am not sure it helps _expected_attributes = [ Attribute('value', value_type=np.ndarray), ] def initialize(self): value = self.attributes['value'] - self.value = value # note, this is unquantized; Only here for easier access shape = list(value.shape) if not shape: shape = (1,) - self.value = np.array([self.value]) + self.set_attr('value', np.array([value])) dims = [f'{self.name}_{i}' for i in range(len(shape))] self.add_output_variable(shape, dims, var_name=self.name, precision=self.get_attr("precision")) @@ -455,7 +455,6 @@ class Conv(Layer): """ def initialize(self): - # use negative indexing because it is not clear if batch dimension is always stripped if self.attributes['n_dim'] == 1: # this is 1D convolution shape = [self.attributes['out_width'], self.attributes['n_filt']] @@ -932,6 +931,7 @@ def initialize(self): self.add_weights_variable(name='bias', var_name='b{index}', data=bias) +# TODO: discuss whether this should be renamed to soemthing more descriptive, and whether the class hierarchy makes sense class ApplyAlpha(BatchNormalization): '''A custom layer to scale the output of a QDense layer which used 'alpha != 1' Inference computation uses BatchNormalization methods''' @@ -941,6 +941,7 @@ def initialize(self): shape = inp.shape dims = inp.dim_names self.add_output_variable(shape, dims) + self.set_attr('n_in', inp.size()) scale = self.get_attr('scale_data') scale_quantizer = self.get_attr('scale_quantizer') diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py index f38bfd841d..adc7dff093 100644 --- a/hls4ml/model/optimizer/passes/merge_const.py +++ b/hls4ml/model/optimizer/passes/merge_const.py @@ -1,8 +1,8 @@ import numpy as np -from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer from hls4ml.model.layers import ApplyAlpha, Constant, Merge from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.quantizers import QuantNodeQuantizer _base_attributes = ('Trace', 'reuse_factor', 'n_in') diff --git a/hls4ml/model/optimizer/passes/propagate_acc_precision.py b/hls4ml/model/optimizer/passes/propagate_acc_precision.py deleted file mode 100644 index 375979de4e..0000000000 --- a/hls4ml/model/optimizer/passes/propagate_acc_precision.py +++ /dev/null @@ -1,106 +0,0 @@ -import math # prefer to use math.ceil for scalar values (returns int) - -import numpy as np - -from hls4ml.model.layers import Conv1D, Conv2D, Dense -from hls4ml.model.optimizer import OptimizerPass -from hls4ml.model.types import FixedPrecisionType, NamedType - -# TODO: Update these to use the new auto precision, not depdening only on QONNX values - - -class PropagateDensePrecision(OptimizerPass): - """ - Propagate precision for Dense nodes. Restrict it to only cases where - the precision is set by a quant node, since otherwise the values get huge. - """ - - def match(self, node): - is_match = isinstance(node, Dense) - return is_match - - def transform(self, model, node): - input_precision = node.get_input_node().get_attr("quant_precision") - weight_precision = node.get_attr("weight_precision") - if not input_precision or not weight_precision: - return False - - bias_precision = node.get_attr("bias_precision") - input_variable = node.get_input_variable() - num_acc = input_variable.shape[-1] - - accum_precision = _propagate_type_acc(input_precision, weight_precision, bias_precision, num_acc) - - accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision) - node.set_attr('accum_t', accum_t) - - if not node.get_attr("quant_precision"): - # output precision not set by quant node - node.update_output_precision(accum_precision) - - return False - - -class PropagateConvPrecision(OptimizerPass): - """Propagate precision for conv nodes. Restrict it to only cases where - the precision is set by a quant node, since otherwise the values get huge. - """ - - def match(self, node): - is_match = isinstance(node, (Conv1D, Conv2D)) - return is_match - - def transform(self, model, node): - input_precision = node.get_input_node().get_attr("quant_precision") - weight_precision = node.get_attr("weight_precision") - if not input_precision or not weight_precision: - return False - - bias_precision = node.get_attr("bias_precision") - num_feature_maps = node.weights['weight'].data_unquantized.shape[-1] - filt_width = node.get_attr('filt_width') - filt_height = node.get_attr('filt_height', 1) - - num_acc = filt_width * filt_height * num_feature_maps - - accum_precision = _propagate_type_acc(input_precision, weight_precision, bias_precision, num_acc) - - accum_t = NamedType(f'layer{node.index}_accum_t', accum_precision) - node.set_attr('accum_t', accum_t) - - if not node.get_attr("quant_precision"): - # output precision not explicitly set by quant node - node.update_output_precision(accum_precision) - - return False - - -def _propagate_type_acc(input_precision, weight_precision, bias_precision, num_acc): - ''' - Propagate the precion type across a multiply. Rounding modes are propagated from input_precision - ''' - - # check to make sure none are None - bitwidth = weight_precision.width + input_precision.width + math.ceil(np.log2(num_acc)) - integer = weight_precision.integer + input_precision.integer + math.ceil(np.log2(num_acc)) - signed = weight_precision.signed or input_precision.signed - - # Because calculating precision, no need to round or sautration - rounding_mode = None - saturation_mode = None - - frac = bitwidth - integer - - # correct for bias - if bias_precision: - integer = ( - max( - integer + (bias_precision.signed and not signed), - bias_precision.integer + (signed and not bias_precision.signed), - ) - + 1 - ) - bitwidth = integer + max(frac, bias_precision.width - bias_precision.integer) - signed = signed or bias_precision.signed - - return FixedPrecisionType(bitwidth, integer, signed, rounding_mode, saturation_mode) diff --git a/hls4ml/model/optimizer/passes/qkeras.py b/hls4ml/model/optimizer/passes/qkeras.py index 7bed6cb1e7..a97438832d 100644 --- a/hls4ml/model/optimizer/passes/qkeras.py +++ b/hls4ml/model/optimizer/passes/qkeras.py @@ -3,7 +3,8 @@ from hls4ml.model.layers import ApplyAlpha from hls4ml.model.optimizer import ConfigurableOptimizerPass, OptimizerPass, register_pass -from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType, QKerasPO2Quantizer +from hls4ml.model.quantizers import QKerasPO2Quantizer +from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType class OutputRoundingSaturationMode(ConfigurableOptimizerPass): diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py index f0a5129d52..dc6deab14b 100644 --- a/hls4ml/model/optimizer/passes/quant_opt.py +++ b/hls4ml/model/optimizer/passes/quant_opt.py @@ -1,28 +1,25 @@ -''' +""" This file includes optimizations related to quant nodes. -As a first step, QuantConstantParameters converts the extra inputs to attributes. It is always the first step +As a first step, QuantConstantParameters converts the extra inputs to attributes. -The next step differs between the case of (1) unitary scale and zero offset, or (2) nonunitary scale and/or -nonzero offset. In the first case no scaling is required, so a Quant node effectively becomes a linear activation. -For the common case when this is applied on a constant weight, the activation is immediately merged with the weight, -qantizing the weights. In case 2, we need to explictly scale and unscale, so the Quant node becomes 3 nodes, an -ApplyAlpha node to apply a scale/shift, a Linear node to apply the quantization, and another ApplyAlpha to unscale/shift. -We depend on optimization steps to move the unscaling ApplyAlpha down as needed. Again, when the Quant is a applied to a -Constant, the scaling and Linear nodes are immediately merged into the Constant. This is done because it simplifies some -of the other optimizations. +The next step differs between the case of (1) (positive) power-of-2 scale and zero offset, or (2) other cases. In the first +case no explicit scaling is required, so a Quant node logically becomes a linear activation. (Cases when the scale is a +power of 2 not equal to one are implicitly scaled with fixed precision types.) When the activation is applied to a constant +weight, the activation is immediately merged with the weight, quantizing the weights. In case (2), we need to explicitly +scale and unscale, so the Quant node becomes 3 nodes, an ApplyAlpha node to apply a scale/shift, a Linear node to apply the +quantization, and another ApplyAlpha to unscale/shift. We depend on optimization steps to move the unscaling ApplyAlpha +down as needed so that we can do integer or fixed-point calculations. When the Quant is a applied to a weight, the scaling +and Linear nodes are immediately merged into the Constant. -UPDATE: Case 1 is loosened to also include power of 2 scalar scales, not just unitary scale, if - _ALSO_INCLUDE_PO2 is set to true (the default) - -''' +""" import math # prefer to use math.ceil for scalar values import numpy as np -from hls4ml.converters.onnx.quantizer import QuantNodeQuantizer from hls4ml.model.layers import Activation, ApplyAlpha, Constant, Quant from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.quantizers import QuantNodeQuantizer from hls4ml.model.types import FixedPrecisionType _ALSO_MATCH_PO2 = True @@ -44,28 +41,29 @@ def match(self, node): def transform(self, model, node): """ - Remove Constant from the Qaunt node parameters (but not input[0]) + Remove Constant from the Quant node parameters (but not input[0]) """ if node.get_input_node(node.inputs[1]): scale_node = node.get_input_node(node.inputs[1]) if isinstance(scale_node, Constant): - node.set_attr('scale', scale_node.value) + node.set_attr('scale', scale_node.get_attr('value')) node.inputs[1] = '' model.remove_node(scale_node, rewire=False) if node.get_input_node(node.inputs[2]): zeropt_node = node.get_input_node(node.inputs[2]) if isinstance(zeropt_node, Constant): - node.set_attr('zeropt', zeropt_node.value) + node.set_attr('zeropt', zeropt_node.get_attr('value')) node.inputs[2] = '' model.remove_node(zeropt_node, rewire=False) if node.get_input_node(node.inputs[3]): bitwidth_node = node.get_input_node(node.inputs[3]) if isinstance(bitwidth_node, Constant): - if np.squeeze(bitwidth_node.value).shape: - raise RuntimeError("Only scalar bitwidth values are supporeted by the Quant node") - node.set_attr('bitwidth', bitwidth_node.value) + bitwidth = bitwidth_node.get_attr('value') + if bitwidth.size != 1: + raise RuntimeError('Only scalar bitwidth values are supporeted by the Quant node') + node.set_attr('bitwidth', bitwidth) node.inputs[3] = '' model.remove_node(bitwidth_node, rewire=False) @@ -73,14 +71,12 @@ def transform(self, model, node): class QuantToActivation(OptimizerPass): - ''' - This is for the case when scale is 1 and zeropt is 0. It is a a 1:1 transformation of + """ + This is for the case when scale is a (positive) power of 2 and zeropt is 0. It is a a 1:1 transformation of a Quant to an Activation. As an optimization, this is not called when the input is constant. - - UPDATE: this is also called when scale is scalar and power of 2, not just 1. - ''' + """ def match(self, node): # only matches after the other inputs are already folded @@ -93,47 +89,43 @@ def match(self, node): and not node.get_input_node(node.inputs[3]) ) - # Only match if the scale is 1s and the zero-point is 0s + # Only match if the scale is power of 2 and the zero-point is 0s if is_match: # to make sure this is a quant node with inputs - scale = node.get_attr("scale") - bias = node.get_attr("zeropt") + scale = node.get_attr('scale') + bias = node.get_attr('zeropt') is_match = is_match and (bias == np.zeros_like(bias)).all() # check if scale is ones-like or a power of two scale_unit_or_po2 = (scale == np.ones_like(scale)).all() if not scale_unit_or_po2 and _ALSO_MATCH_PO2: - sqscale = np.squeeze(scale) - if not sqscale.shape: - # not an array - mantissa, _ = np.frexp(sqscale) + # This optimization only works if all scales are the same + if np.all(scale[0] == scale): + mantissa, _ = np.frexp(scale[0]) scale_unit_or_po2 = mantissa == 0.5 - is_match = is_match and scale_unit_or_po2 + is_match = scale_unit_or_po2 return is_match def transform(self, model, node): - ''' + """ Change quant node to Activation - ''' - input_shape = node.get_input_variable().shape - - n_in = np.prod(input_shape) + """ - rounding_mode = node.get_attr("rounding_mode") - narrow = node.get_attr("narrow") - signed = node.get_attr("signed") - bitwidth = node.get_attr("bitwidth") + rounding_mode = node.get_attr('rounding_mode') + narrow = node.get_attr('narrow') + signed = node.get_attr('signed') + bitwidth = node.get_attr('bitwidth') integer = bitwidth - scale = node.get_attr("scale") + scale = node.get_attr('scale') if _ALSO_MATCH_PO2 and not (scale == np.ones_like(scale)).all(): - _, exp = np.frexp(np.squeeze(scale)) + _, exp = np.frexp(scale[0]) integer = bitwidth + exp - 1 precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode) attributes = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer, 'n_in': n_in}) + attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer}) new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs]) new_node.get_output_variable().type.precision = precision @@ -143,10 +135,9 @@ def transform(self, model, node): class FuseQuantWithConstant(OptimizerPass): - ''' - This is for the case when scale is 1 and zeropt is 0. It directly applies the quantization to a constant. - UPDATE: this is also called when scale is scalar and power of 2, not just 1. - ''' + """ + This is for the case when scale is a positive power of 2 and zeropt is 0. + """ def match(self, node): # only matches after the other inputs are already folded @@ -158,36 +149,35 @@ def match(self, node): and not node.get_input_node(node.inputs[3]) ) - # Only match if the scale is 1s and the zero-point is 0s + # Only match if the scale is power of 2 and the zero-point is 0s if is_match: # to make sure this is a quant node with inputs - scale = node.get_attr("scale") - bias = node.get_attr("zeropt") + scale = node.get_attr('scale') + bias = node.get_attr('zeropt') is_match = is_match and (bias == np.zeros_like(bias)).all() # check if scale is ones-like or a power of two scale_unit_or_po2 = (scale == np.ones_like(scale)).all() if not scale_unit_or_po2 and _ALSO_MATCH_PO2: - sqscale = np.squeeze(scale) - if not sqscale.shape: - # not an array - mantissa, _ = np.frexp(sqscale) + # This optimization only works if all scales are the same + if np.all(scale[0] == scale): + mantissa, _ = np.frexp(scale[0]) scale_unit_or_po2 = mantissa == 0.5 - is_match = is_match and scale_unit_or_po2 + is_match = scale_unit_or_po2 return is_match def transform(self, model, node): - ''' + """ Fuse Quant with Constant. - ''' + """ - rounding_mode = node.get_attr("rounding_mode") - narrow = node.get_attr("narrow") - signed = node.get_attr("signed") - bitwidth = node.get_attr("bitwidth") + rounding_mode = node.get_attr('rounding_mode') + narrow = node.get_attr('narrow') + signed = node.get_attr('signed') + bitwidth = node.get_attr('bitwidth') integer = bitwidth - scale = node.get_attr("scale") + scale = node.get_attr('scale') if _ALSO_MATCH_PO2 and not (scale == np.ones_like(scale)).all(): _, exp = np.frexp(np.squeeze(scale)) integer = bitwidth + exp - 1 @@ -195,11 +185,9 @@ def transform(self, model, node): precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode) const_node = node.get_input_node(node.inputs[0]) - const_node.set_attr("quant_precision", precision) - const_node.set_attr("quantizer", quantizer) - - # reinitialize (which also runs quantization if quantizer exists) - const_node.initialize() + const_node.set_attr('quant_precision', precision) + const_node.set_attr('quantizer', quantizer) + const_node.get_output_variable().type.precision = precision # remove the Quant node model.remove_node(node, rewire=True) @@ -208,12 +196,12 @@ def transform(self, model, node): class QuantToAlphaActivationAlpha(OptimizerPass): - ''' + """ This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of a Quant to an ApplyAlpha (to scale), Activatio, ApplyAlpho (to rescale). - As an optimization, this is not called when the input is constant. - ''' + NOTE: It needs to be scheduled after QuantToActivation (or we need to make the match criteria stricter) + """ def match(self, node): # only matches after the other inputs are already folded @@ -224,33 +212,24 @@ def match(self, node): and not node.get_input_node(node.inputs[2]) and not node.get_input_node(node.inputs[3]) ) - - if is_match: # to make sure this is a quant node with inputs - scale = node.get_attr("scale") - bias = node.get_attr("zeropt") - is_match = is_match and ((scale != np.ones_like(scale)).any() or (bias != np.zeros_like(bias)).any()) return is_match def transform(self, model, node): - ''' + """ Change quant node to ApplyAlhpa, Activation, ApplyAlpha - ''' + """ # Do the Activation as in the simple case - input_shape = node.get_input_variable().shape - - n_in = np.prod(input_shape) - - rounding_mode = node.get_attr("rounding_mode") - narrow = node.get_attr("narrow") - signed = node.get_attr("signed") - bitwidth = node.get_attr("bitwidth") + rounding_mode = node.get_attr('rounding_mode') + narrow = node.get_attr('narrow') + signed = node.get_attr('signed') + bitwidth = node.get_attr('bitwidth') precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode) attributes = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer, 'n_in': n_in}) + attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer}) new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs]) new_node.get_output_variable().type.precision = precision @@ -258,27 +237,25 @@ def transform(self, model, node): # but now add the ApplyAlhpas before and after - scale = node.get_attr("scale") - bias = node.get_attr("zeropt") + scale = node.get_attr('scale') + bias = node.get_attr('zeropt') attributes_scale = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes_scale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1}) attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes_rescale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1}) firstscale = 1 / scale firstbias = bias - attributes_scale["scale_data"] = firstscale - attributes_scale["bias_data"] = firstbias + attributes_scale['scale_data'] = firstscale + attributes_scale['bias_data'] = firstbias scale_node = model.make_node(ApplyAlpha, node.name + '_scale', attributes_scale, [node.inputs[0]]) model.insert_node(scale_node) rescale = scale rebias = -bias * scale - attributes_rescale["scale_data"] = rescale - attributes_rescale["bias_data"] = rebias + attributes_rescale['scale_data'] = rescale + attributes_rescale['bias_data'] = rebias rescale_node = model.make_node(ApplyAlpha, node.name + '_rescale', attributes_rescale, [new_node.outputs[0]]) model.insert_node(rescale_node) @@ -287,12 +264,12 @@ def transform(self, model, node): class ConstQuantToConstAlpha(OptimizerPass): - ''' + """ This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of a Quant to an ApplyAlpha (to scale), Activation, ApplyAlpho (to unscale), but an input consts allows for optimization, so the ApplyAlpha (to scale), Activation are optimized away right away. - ''' + """ def match(self, node): # only matches after the other inputs are already folded @@ -305,39 +282,37 @@ def match(self, node): ) if is_match: # to make sure this is a quant node with inputs - scale = node.get_attr("scale") - bias = node.get_attr("zeropt") + scale = node.get_attr('scale') + bias = node.get_attr('zeropt') is_match = is_match and ((scale != np.ones_like(scale)).any() or (bias != np.zeros_like(bias)).any()) return is_match def transform(self, model, node): - ''' + """ Change Constant + Quant node to Constant, ApplyAlpha - ''' + """ # Do the Activation as in the simple case - input_shape = node.get_input_variable().shape - - n_in = np.prod(input_shape) + n_in = node.get_input_variable().size() - rounding_mode = node.get_attr("rounding_mode") - narrow = node.get_attr("narrow") - signed = node.get_attr("signed") - bitwidth = node.get_attr("bitwidth") + rounding_mode = node.get_attr('rounding_mode') + narrow = node.get_attr('narrow') + signed = node.get_attr('signed') + bitwidth = node.get_attr('bitwidth') precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode) const_node = node.get_input_node(node.inputs[0]) - scale = node.get_attr("scale") - bias = node.get_attr("zeropt") + scale = node.get_attr('scale') + bias = node.get_attr('zeropt') # caclucate the new value - new_val = const_node.value / scale + bias + new_val = const_node.get_attr('value') / scale + bias const_node.set_attr('value', new_val) - const_node.set_attr("quant_precision", precision) - const_node.set_attr("quantizer", quantizer) + const_node.set_attr('quant_precision', precision) + const_node.set_attr('quantizer', quantizer) # reinitialize (which also runs quantization if quantizer exists) const_node.initialize() @@ -347,8 +322,8 @@ def transform(self, model, node): rescale = scale rebias = -bias * scale - attributes_rescale["scale_data"] = rescale - attributes_rescale["bias_data"] = rebias + attributes_rescale['scale_data'] = rescale + attributes_rescale['bias_data'] = rebias rescale_node = model.make_node( ApplyAlpha, node.name + '_rescale', attributes_rescale, [x for x in node.inputs], [x for x in node.outputs] @@ -359,25 +334,25 @@ def transform(self, model, node): def _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode): - ''' + """ A function to determine the precision and quantizer - ''' - if rounding_mode == "ROUND": - bn_round = "AP_RND_CONV" - elif rounding_mode == "FLOOR": - bn_round = "AP_TRN" + """ + if rounding_mode == 'ROUND': + bn_round = 'AP_RND_CONV' + elif rounding_mode == 'FLOOR': + bn_round = 'AP_TRN' else: raise NotImplementedError( - f"Rounding mode {rounding_mode} not supported in Quant node. Only ROUND and FLOOR supported." + f'Rounding mode {rounding_mode} not supported in Quant node. Only ROUND and FLOOR supported.' ) if narrow and not signed: - raise NotImplementedError("Narrow mode is only supported for singed numbers.") + raise NotImplementedError('Narrow mode is only supported for singed numbers.') if narrow: - bn_sat = "AP_SAT_SYM" + bn_sat = 'AP_SAT_SYM' else: - bn_sat = "AP_SAT" + bn_sat = 'AP_SAT' bitwidth = math.ceil(bitwidth) integer = math.ceil(integer) diff --git a/hls4ml/model/quantizers.py b/hls4ml/model/quantizers.py new file mode 100644 index 0000000000..c0a5869d5b --- /dev/null +++ b/hls4ml/model/quantizers.py @@ -0,0 +1,261 @@ +""" +Quantizer for the Quant node, after scale and zeropoint hafe been extracted +(unless scale is a power of 2, if doing special case po2) + +This is based on the sample implementation in finn-base +""" + +import numpy as np +import tensorflow as tf +from qkeras.quantizers import get_quantizer + +from hls4ml.model.types import ( + ExponentPrecisionType, + FixedPrecisionType, + IntegerPrecisionType, + RoundingMode, + SaturationMode, + XnorPrecisionType, +) + + +class Quantizer: + """ + Base class for representing quantizers in hls4ml. + + Subclasses of ``Quantizer`` are expected to wrap the quantizers of upstream tools (e.g., QKeras). + + Args: + bits (int): Total number of bits used by the quantizer. + hls_type (NamedType): The hls4ml type used by the quantizer. + """ + + def __init__(self, bits, hls_type): + self.bits = bits + self.hls_type = hls_type + + def __call__(self, data): + raise NotImplementedError + + +class BinaryQuantizer(Quantizer): + """Quantizer that quantizes to 0 and 1 (``bits=1``) or -1 and 1 (``bits==2``). + + Args: + bits (int, optional): Number of bits used by the quantizer. Defaults to 2. + + Raises: + Exception: Raised if ``bits>2`` + """ + + def __init__(self, bits=2): + if bits == 1: + hls_type = XnorPrecisionType() + elif bits == 2: + hls_type = IntegerPrecisionType(width=2) + else: + raise Exception(f'BinaryQuantizer suppots 1 or 2 bits, but called with bits={bits}') + super().__init__(bits, hls_type) + + def __call__(self, data): + zeros = np.zeros_like(data) + ones = np.ones_like(data) + quant_data = data + if self.bits == 1: + quant_data = np.where(data > 0, ones, zeros).astype('int') + if self.bits == 2: + quant_data = np.where(data > 0, ones, -ones) + return quant_data + + +class TernaryQuantizer(Quantizer): + """Quantizer that quantizes to -1, 0 and 1.""" + + def __init__(self): + super().__init__(2, IntegerPrecisionType(width=2)) + + def __call__(self, data): + zeros = np.zeros_like(data) + ones = np.ones_like(data) + return np.where(data > 0.5, ones, np.where(data <= -0.5, -ones, zeros)) + + +class QKerasQuantizer(Quantizer): + """Wrapper around QKeras quantizers. + + Args: + config (dict): Config of the QKeras quantizer to wrap. + """ + + def __init__(self, config): + self.quantizer_fn = get_quantizer(config) + self.alpha = config['config'].get('alpha', None) + if config['class_name'] == 'quantized_bits': + self.bits = config['config']['bits'] + self.hls_type = self._get_type(config) + # ! includes stochastic_ternary + elif 'ternary' in config['class_name']: + self.bits = 2 + self.hls_type = IntegerPrecisionType(width=2, signed=True) + # ! includes stochastic_binary + elif 'binary' in config['class_name']: + self.bits = 1 + self.hls_type = XnorPrecisionType() + else: + print("Unsupported quantizer: " + config['class_name']) + self.bits = 16 + self.hls_type = FixedPrecisionType(width=16, integer=6, signed=True) + + def __call__(self, data): + tf_data = tf.convert_to_tensor(data) + return self.quantizer_fn(tf_data).numpy() + # return self.quantizer_fn(data) + + def _get_type(self, quantizer_config): + width = quantizer_config['config']['bits'] + integer = quantizer_config['config'].get('integer', 0) + if quantizer_config['class_name'] == 'quantized_po2': + return ExponentPrecisionType(width=width, signed=True) + if width == integer: + if width == 1: + return XnorPrecisionType() + else: + return IntegerPrecisionType(width=width, signed=True) + else: + return FixedPrecisionType(width=width, integer=integer + 1, signed=True) + + +class QKerasBinaryQuantizer(Quantizer): + """Wrapper around QKeras binary quantizer. + + Args: + config (dict): Config of the QKeras quantizer to wrap. + """ + + def __init__(self, config, xnor=False): + self.bits = 1 if xnor else 2 + self.hls_type = XnorPrecisionType() if xnor else IntegerPrecisionType(width=2, signed=True) + self.alpha = config['config']['alpha'] + # Use the QKeras quantizer to handle any stochastic / alpha stuff + self.quantizer_fn = get_quantizer(config) + # Then we use our BinaryQuantizer to convert to '0,1' format + self.binary_quantizer = BinaryQuantizer(1) if xnor else BinaryQuantizer(2) + + def __call__(self, data): + x = tf.convert_to_tensor(data) + y = self.quantizer_fn(x).numpy() + return self.binary_quantizer(y) + + +class QKerasPO2Quantizer(Quantizer): + """Wrapper around QKeras power-of-2 quantizers. + + Args: + config (dict): Config of the QKeras quantizer to wrap. + """ + + def __init__(self, config): + self.bits = config['config']['bits'] + self.quantizer_fn = get_quantizer(config) + self.hls_type = ExponentPrecisionType(width=self.bits, signed=True) + + def __call__(self, data): + # Weights are quantized to nearest power of two + x = tf.convert_to_tensor(data) + y = self.quantizer_fn(x) + if hasattr(y, 'numpy'): + y = y.numpy() + return y + + +class QuantNodeQuantizer(Quantizer): + """ + This implements a quantizer for a FixedPrecisionType with width==integer + + This is based on the sample implementation in finn-base + """ + + def __init__(self, precision): + super().__init__(precision.width, precision) + if not isinstance(precision, FixedPrecisionType): + raise TypeError("QuantNodeQuantizer is only defined for FixedPrecisionType") + + def __call__(self, data): + """Apply the quantization on the data""" + + scale = 2 ** (self.hls_type.width - self.hls_type.integer) + + data = data * scale # (not using *= to avoid modifying data) + # Clamping + min_int_val = self._min_int(self.hls_type.signed, self.hls_type.saturation_mode, self.bits) + max_int_val = self._max_int(self.hls_type.signed, self.bits) + data = np.where(data > max_int_val, max_int_val, data) + data = np.where(data < min_int_val, min_int_val, data) + # Rounding + rounding_fx = self._resolve_rounding_mode(self.hls_type.rounding_mode) + return rounding_fx(data) / scale + + @staticmethod + def _min_int(signed: bool, saturation_mode: str, bit_width: int) -> int: + """Compute the minimum integer representable by a given number of bits. + Args: + signed (bool): Indicates whether the represented integer is signed or not. + saturation_mode (bool): Indicates the saturation mode used (AP_SAT_SYM or AP_SAT) + bit_width (int): Number of bits available for the representation. + Returns: + int: Maximum unsigned integer that can be represented according to + the input arguments. + Examples: + >>> min_int(signed=True, saturation_mode='AP_SAT_SYM', bit_width=8) + int(-127) + >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8) + int(0) + >>> min_int(signed=True, saturation_mode='AP_SAT', bit_width=8) + int(-128) + >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8) + int(0) + """ + if saturation_mode not in (SaturationMode.SAT_SYM, SaturationMode.SAT): + raise ValueError(f"Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported") + if signed and saturation_mode == SaturationMode.SAT_SYM: + value = -(2 ** (bit_width - 1)) + 1 + elif signed: + value = -(2 ** (bit_width - 1)) + else: + value = 0 + return value + + @staticmethod + def _max_int(signed: bool, bit_width: int) -> int: + """Compute the maximum integer representable by a given number of bits. + (Note, narrow and unsigned is not supported by the implementation, so saturation mode is not used) + Args: + signed (bool): Indicates whether the represented integer is signed or not. + bit_width (int): Number of bits available for the representation. + Returns: + Tensor: Maximum integer that can be represented according to + the input arguments. + Examples: + >>> max_int(signed=True, bit_width=8) + int(127) + >>> max_int(signed=False, bit_width=8) + int(255) + """ + if not signed: + value = (2**bit_width) - 1 + else: + value = (2 ** (bit_width - 1)) - 1 + return value + + @staticmethod + def _resolve_rounding_mode(mode): + """Resolve the rounding mode of Quant and Trunc ops + to the corresponding numpy functions.""" + if mode == RoundingMode.RND_CONV: + return np.round + # elif mode_string == "CEIL": # not supported + # return np.ceil + elif mode == RoundingMode.TRN: + return np.floor + else: + raise ValueError(f"Rounding mode {mode} not supported.") diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py index fc1cd98f19..8c182f4cca 100644 --- a/hls4ml/model/types.py +++ b/hls4ml/model/types.py @@ -8,162 +8,6 @@ from enum import Enum import numpy as np -import tensorflow as tf -from qkeras.quantizers import get_quantizer - -# region Quantizer definition - - -class Quantizer: - """ - Base class for representing quantizers in hls4ml. - - Subclasses of ``Quantizer`` are expected to wrap the quantizers of upstream tools (e.g., QKeras). - - Args: - bits (int): Total number of bits used by the quantizer. - hls_type (NamedType): The hls4ml type used by the quantizer. - """ - - def __init__(self, bits, hls_type): - self.bits = bits - self.hls_type = hls_type - - def __call__(self, data): - raise NotImplementedError - - -class BinaryQuantizer(Quantizer): - """Quantizer that quantizes to 0 and 1 (``bits=1``) or -1 and 1 (``bits==2``). - - Args: - bits (int, optional): Number of bits used by the quantizer. Defaults to 2. - - Raises: - Exception: Raised if ``bits>2`` - """ - - def __init__(self, bits=2): - if bits == 1: - hls_type = XnorPrecisionType() - elif bits == 2: - hls_type = IntegerPrecisionType(width=2) - else: - raise Exception(f'BinaryQuantizer suppots 1 or 2 bits, but called with bits={bits}') - super().__init__(bits, hls_type) - - def __call__(self, data): - zeros = np.zeros_like(data) - ones = np.ones_like(data) - quant_data = data - if self.bits == 1: - quant_data = np.where(data > 0, ones, zeros).astype('int') - if self.bits == 2: - quant_data = np.where(data > 0, ones, -ones) - return quant_data - - -class TernaryQuantizer(Quantizer): - """Quantizer that quantizes to -1, 0 and 1.""" - - def __init__(self): - super().__init__(2, IntegerPrecisionType(width=2)) - - def __call__(self, data): - zeros = np.zeros_like(data) - ones = np.ones_like(data) - return np.where(data > 0.5, ones, np.where(data <= -0.5, -ones, zeros)) - - -class QKerasQuantizer(Quantizer): - """Wrapper around QKeras quantizers. - - Args: - config (dict): Config of the QKeras quantizer to wrap. - """ - - def __init__(self, config): - self.quantizer_fn = get_quantizer(config) - self.alpha = config['config'].get('alpha', None) - if config['class_name'] == 'quantized_bits': - self.bits = config['config']['bits'] - self.hls_type = self._get_type(config) - # ! includes stochastic_ternary - elif 'ternary' in config['class_name']: - self.bits = 2 - self.hls_type = IntegerPrecisionType(width=2, signed=True) - # ! includes stochastic_binary - elif 'binary' in config['class_name']: - self.bits = 1 - self.hls_type = XnorPrecisionType() - else: - print("Unsupported quantizer: " + config['class_name']) - self.bits = 16 - self.hls_type = FixedPrecisionType(width=16, integer=6, signed=True) - - def __call__(self, data): - tf_data = tf.convert_to_tensor(data) - return self.quantizer_fn(tf_data).numpy() - # return self.quantizer_fn(data) - - def _get_type(self, quantizer_config): - width = quantizer_config['config']['bits'] - integer = quantizer_config['config'].get('integer', 0) - if quantizer_config['class_name'] == 'quantized_po2': - return ExponentPrecisionType(width=width, signed=True) - if width == integer: - if width == 1: - return XnorPrecisionType() - else: - return IntegerPrecisionType(width=width, signed=True) - else: - return FixedPrecisionType(width=width, integer=integer + 1, signed=True) - - -class QKerasBinaryQuantizer(Quantizer): - """Wrapper around QKeras binary quantizer. - - Args: - config (dict): Config of the QKeras quantizer to wrap. - """ - - def __init__(self, config, xnor=False): - self.bits = 1 if xnor else 2 - self.hls_type = XnorPrecisionType() if xnor else IntegerPrecisionType(width=2, signed=True) - self.alpha = config['config']['alpha'] - # Use the QKeras quantizer to handle any stochastic / alpha stuff - self.quantizer_fn = get_quantizer(config) - # Then we use our BinaryQuantizer to convert to '0,1' format - self.binary_quantizer = BinaryQuantizer(1) if xnor else BinaryQuantizer(2) - - def __call__(self, data): - x = tf.convert_to_tensor(data) - y = self.quantizer_fn(x).numpy() - return self.binary_quantizer(y) - - -class QKerasPO2Quantizer(Quantizer): - """Wrapper around QKeras power-of-2 quantizers. - - Args: - config (dict): Config of the QKeras quantizer to wrap. - """ - - def __init__(self, config): - self.bits = config['config']['bits'] - self.quantizer_fn = get_quantizer(config) - self.hls_type = ExponentPrecisionType(width=self.bits, signed=True) - - def __call__(self, data): - # Weights are quantized to nearest power of two - x = tf.convert_to_tensor(data) - y = self.quantizer_fn(x) - if hasattr(y, 'numpy'): - y = y.numpy() - return y - - -# endregion # region Precision types diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py index 535bffb0da..2c314c13ca 100644 --- a/test/pytest/test_qonnx.py +++ b/test/pytest/test_qonnx.py @@ -88,7 +88,7 @@ def test_tfc_2w2a(tfc_2w2a_model, backend): model = tfc_2w2a_model ishape = (1, 1, 28, 28) - X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32) + X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape).astype(np.float32) idict = {model.graph.input[0].name: X} y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] @@ -111,7 +111,7 @@ def test_cnv_2w2a(cnv_2w2a_model, backend): model = cnv_2w2a_model ishape = (1, 32, 32, 3) - X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32) + X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape).astype(np.float32) idict = {model.graph.input[0].name: X} y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] @@ -137,7 +137,7 @@ def test_jet_tagging(jettagging_model, backend): # Execute QONNX model inference # TODO make the test bigger ishape = (1, 16) - X = np.random.uniform(low=-1, high=+1, size=np.product(ishape)).reshape(ishape).astype(np.float32) + X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape).astype(np.float32) idict = {model.graph.input[0].name: X} y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] From 4d529756337961228216dc788aa1f8f79eb76cb3 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Mon, 5 Feb 2024 14:34:21 -0600 Subject: [PATCH 15/62] snapshot before removing redundant precision attributes --- .../model/optimizer/passes/conv_to_convxd.py | 25 +++--- .../optimizer/passes/matmul_const_to_dense.py | 24 +++--- hls4ml/model/optimizer/passes/merge_const.py | 79 ++++++++++--------- hls4ml/model/optimizer/passes/quant_opt.py | 45 +++++------ 4 files changed, 85 insertions(+), 88 deletions(-) diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py index 28f4d4c0bd..efc5f3e89b 100644 --- a/hls4ml/model/optimizer/passes/conv_to_convxd.py +++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py @@ -48,12 +48,13 @@ def transform(self, model, node): """Convert Conv with constant to a Conv1D or Conv2D layer""" weight_node = node.get_input_node(node.inputs[1]) - weight_precision = weight_node.get_attr("quant_precision") + weight_precision = weight_node.get_attr('quant_precision') + weight_data = weight_node.attributes['value'] bias_node = None bias_precision = None if len(node.inputs) == 3: bias_node = node.get_input_node(node.inputs[2]) - bias_precision = bias_node.get_attr("quant_precision") + bias_precision = bias_node.get_attr('quant_precision') # creating the attributes attributes = {k: node.attributes.get(k, None) for k in _base_attributes} @@ -61,24 +62,24 @@ def transform(self, model, node): # The ConvxD nodes expect the weight data to be in a different format, not (M, k1.., C) if node.attributes['n_dim'] == 1: newtype = Conv1D - attributes["weight_data"] = np.transpose(weight_node.value, (1, 2, 0)) + attributes['weight_data'] = np.transpose(weight_data, (1, 2, 0)) else: newtype = Conv2D - attributes["weight_data"] = np.transpose(weight_node.value, (1, 2, 3, 0)) - attributes["weight_precision"] = weight_precision - attributes["weight_quantizer"] = weight_node.get_attr("quantizer") + attributes['weight_data'] = np.transpose(weight_data, (1, 2, 3, 0)) + attributes['weight_precision'] = weight_precision + attributes['weight_quantizer'] = weight_node.get_attr('quantizer') if bias_node: - attributes["bias_data"] = bias_node.value - attributes["bias_precision"] = bias_precision - attributes["bias_quantizer"] = bias_node.get_attr("quantizer") + attributes['bias_data'] = bias_node.attributes['value'] + attributes['bias_precision'] = bias_precision + attributes['bias_quantizer'] = bias_node.get_attr('quantizer') else: - attributes["bias_data"] = np.zeros(attributes['n_filt']) - attributes["bias_precision"] = IntegerPrecisionType(1, False) + attributes['bias_data'] = np.zeros(attributes['n_filt']) + attributes['bias_precision'] = IntegerPrecisionType(1, False) # making new node new_node = model.make_node( - newtype, f"{newtype.__name__}_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs] + newtype, f'{newtype.__name__}_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs] ) # removing and replacing old nodes diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py index 82c7b56313..2a89ea0130 100644 --- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py +++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py @@ -26,30 +26,32 @@ def transform(self, model, node): const_node = node.get_input_node(node.inputs[1]) other_var = node.get_input_variable(node.inputs[0]) - weight_precision = const_node.get_attr("quant_precision") - weight_quantizer = const_node.get_attr("quantizer") + weight_data = const_node.attributes['value'] + weight_precision = const_node.get_attr('quant_precision') + weight_quantizer = const_node.get_attr('quantizer') in_shape = other_var.shape n_in = np.prod(in_shape) - out_shape = list(in_shape[:-1]) + [const_node.value.shape[-1]] + out_shape = list(in_shape[:-1]) + [weight_data.shape[-1]] n_out = np.prod(out_shape) # creating the attributes attributes = {k: node.attributes.get(k, None) for k in _base_attributes} attributes.update( { - "weight_data": const_node.value, - "weight_precision": weight_precision, - "weight_quantizer": weight_quantizer, - "bias_data": np.zeros(out_shape), - "bias_precision": IntegerPrecisionType(1, False), - "n_in": n_in, - "n_out": n_out, + 'weight_data': weight_data, + 'weight_precision': weight_precision, + 'weight_quantizer': weight_quantizer, + 'bias_data': np.zeros(out_shape), + 'bias_precision': IntegerPrecisionType(1, False), + 'have_bias': False, + 'n_in': n_in, + 'n_out': n_out, } ) # making new node - new_dense = model.make_node(Dense, f"Dense_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs]) + new_dense = model.make_node(Dense, f'Dense_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs]) # removing and replacing old nodes model.remove_node(const_node, rewire=False) diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py index adc7dff093..4b13982259 100644 --- a/hls4ml/model/optimizer/passes/merge_const.py +++ b/hls4ml/model/optimizer/passes/merge_const.py @@ -6,8 +6,6 @@ _base_attributes = ('Trace', 'reuse_factor', 'n_in') -# TODO This doesn't yet support quantization in the constants - class MergeTwoConstants(OptimizerPass): """Merge of two constants makes another constant""" @@ -23,15 +21,18 @@ def match(self, node): def transform(self, model, node): """ - Merge of two constants makes another constant + Merge of two constants makes another constant. + + Note: full precision is used in the calculation, and precision is not propagated. + The precision """ const_node0 = node.get_input_node(node.inputs[0]) const_node1 = node.get_input_node(node.inputs[1]) - val0 = const_node0.value - val1 = const_node1.value + val0 = const_node0.attributes['value'] + val1 = const_node1.attributes['value'] - op = node.attributes["op"] + op = node.attributes['op'] if op in ('add', 'sum'): new_val = val0 + val1 elif op == 'sub': @@ -47,16 +48,18 @@ def transform(self, model, node): elif op == 'min': new_val = np.minimum(val0, val1) else: - raise RuntimeError(f"Unexpected op_type: {op}") + raise RuntimeError(f'Unexpected op_type: {op}') - quantizer = node.get_attr("quantizer") # None if not defined + quantizer = node.get_attr('quantizer') # None if not defined + const_node0.set_attr('quantizer', quantizer) # overwrite the quantizer if quantizer: - const_node0.set_attr("quantizer", quantizer) - const_node0.set_attr("value", new_val) + const_node0.set_attr('quantizer', quantizer) + + const_node0.set_attr('value', new_val) - quant_precision = node.get_attr("quant_precision") + quant_precision = node.get_attr('quant_precision') if quant_precision: - const_node0.set_attr("quant_precision", quant_precision) + const_node0.set_attr('quant_precision', quant_precision) # reinitialize (which also runs quantization if quantizer exists) const_node0.initialize() @@ -75,7 +78,7 @@ class MergeToApplyAlpha(OptimizerPass): def match(self, node): is_match = ( isinstance(node, Merge) - and node.attributes["op"] in ("add", "sum", "sub", "mul") # Div is separate + and node.attributes['op'] in ('add', 'sum', 'sub', 'mul') # Div is separate and ( isinstance(node.get_input_node(node.inputs[0]), Constant) != isinstance(node.get_input_node(node.inputs[1]), Constant) @@ -103,21 +106,21 @@ def transform(self, model, node): bias_precision = None bias_quantizer = None - op = node.attributes["op"] + op = node.attributes['op'] if op in ('add', 'sum'): scale = np.array(1) - bias = const_node.value - bias_precision = const_node.get_attr("quant_precision") - bias_quantizer = const_node.get_attr("quantizer") + bias = const_node.attribute['value'] + bias_precision = const_node.get_attr('quant_precision') + bias_quantizer = const_node.get_attr('quantizer') elif op == 'sub': if node1const: scale = np.array(1) - bias = -const_node.value + bias = -const_node.attribute['value'] else: scale = np.array(-1) - bias = const_node.value - bias_precision = const_node.get_attr("quant_precision") - bias_quantizer = const_node.get_attr("quantizer") + bias = const_node.attribute['value'] + bias_precision = const_node.get_attr('quant_precision') + bias_quantizer = const_node.get_attr('quantizer') if bias_precision and not bias_precision.signed: # need to add a bit bias_precision.signed = 1 @@ -126,10 +129,10 @@ def transform(self, model, node): bias_quantizer = QuantNodeQuantizer(bias_precision) elif op == 'mul': - scale = const_node.value + scale = const_node.attribute['value'] bias = np.array(0) - scale_precision = const_node.get_attr("quant_precision") - scale_quantizer = const_node.get_attr("quantizer") + scale_precision = const_node.get_attr('quant_precision') + scale_quantizer = const_node.get_attr('quantizer') # because C++ doesn't do broadcasting, we may have to change the shapes of the scale and bias if scale.shape != tuple(input_shape) and np.squeeze(scale).shape != tuple(input_shape): @@ -140,20 +143,20 @@ def transform(self, model, node): attributes = {k: node.attributes.get(k, None) for k in _base_attributes} attributes.update( { - "scale_data": scale, - "bias_data": bias, - "n_in": n_in, - "n_out": n_in, - "n_filt": -1, - "scale_precision": scale_precision, - "scale_quantizer": scale_quantizer, - "bias_precision": bias_precision, - "bias_quantizer": bias_quantizer, + 'scale_data': scale, + 'bias_data': bias, + 'n_in': n_in, + 'n_out': n_in, + 'n_filt': -1, + 'scale_precision': scale_precision, + 'scale_quantizer': scale_quantizer, + 'bias_precision': bias_precision, + 'bias_quantizer': bias_quantizer, } ) bn_layer = model.make_node( - ApplyAlpha, f"bn_{node.name}", attributes, [node.inputs[input_node_idx]], [x for x in node.outputs] + ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[input_node_idx]], [x for x in node.outputs] ) model.remove_node(const_node, rewire=False) @@ -172,7 +175,7 @@ class MergeToApplyAlphaDiv(OptimizerPass): def match(self, node): is_match = ( isinstance(node, Merge) - and node.attributes["op"] == 'div' + and node.attributes['op'] == 'div' and isinstance(node.get_input_node(node.inputs[1]), Constant) ) # only second can be const @@ -182,7 +185,7 @@ def transform(self, model, node): input_shape = node.get_input_variable().shape n_in = np.prod(input_shape) const_node = node.get_input_node(node.inputs[1]) - scale = 1 / const_node.value + scale = 1 / const_node.attribute['value'] bias = np.array(0) # because C++ doesn't do broadcasting, we may have to change the shapes of the scale and bias @@ -192,9 +195,9 @@ def transform(self, model, node): bias = np.broadcast_to(bias, input_shape) attributes = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes.update({"scale_data": scale, "bias_data": bias, "n_in": n_in, "n_out": n_in, "n_filt": -1}) + attributes.update({'scale_data': scale, 'bias_data': bias, 'n_in': n_in, 'n_out': n_in, 'n_filt': -1}) - bn_layer = model.make_node(ApplyAlpha, f"bn_{node.name}", attributes, [node.inputs[0]], [x for x in node.outputs]) + bn_layer = model.make_node(ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs]) model.remove_node(const_node, rewire=False) model.replace_node(node, bn_layer) diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py index dc6deab14b..e49ff99bd7 100644 --- a/hls4ml/model/optimizer/passes/quant_opt.py +++ b/hls4ml/model/optimizer/passes/quant_opt.py @@ -31,10 +31,14 @@ class QuantConstantParameters(OptimizerPass): """Remove Constant from the Qaunt node parameters (but not input[0])""" def match(self, node): - is_match = isinstance(node, Quant) and ( - (node.get_input_node(node.inputs[1]) and isinstance(node.get_input_node(node.inputs[1]), Constant)) - or (node.get_input_node(node.inputs[2]) and isinstance(node.get_input_node(node.inputs[2]), Constant)) - or (node.get_input_node(node.inputs[3]) and isinstance(node.get_input_node(node.inputs[3]), Constant)) + is_match = ( + isinstance(node, Quant) + and len(node.inputs) == 4 + and ( + (node.get_input_node(node.inputs[1]) and isinstance(node.get_input_node(node.inputs[1]), Constant)) + or (node.get_input_node(node.inputs[2]) and isinstance(node.get_input_node(node.inputs[2]), Constant)) + or (node.get_input_node(node.inputs[3]) and isinstance(node.get_input_node(node.inputs[3]), Constant)) + ) ) return is_match @@ -67,6 +71,10 @@ def transform(self, model, node): node.inputs[3] = '' model.remove_node(bitwidth_node, rewire=False) + node.inputs = [inp for inp in node.inputs if inp] + if len(node.inputs) != 1: + raise RuntimeError("hls4ml only supports constant scale, zeropt, and bitwidth values") + return True @@ -83,10 +91,8 @@ def match(self, node): is_match = ( isinstance(node, Quant) + and len(node.inputs) == 1 and not isinstance(node.get_input_node(node.inputs[0]), Constant) - and not node.get_input_node(node.inputs[1]) - and not node.get_input_node(node.inputs[2]) - and not node.get_input_node(node.inputs[3]) ) # Only match if the scale is power of 2 and the zero-point is 0s @@ -142,11 +148,7 @@ class FuseQuantWithConstant(OptimizerPass): def match(self, node): # only matches after the other inputs are already folded is_match = ( - isinstance(node, Quant) - and isinstance(node.get_input_node(node.inputs[0]), Constant) - and not node.get_input_node(node.inputs[1]) - and not node.get_input_node(node.inputs[2]) - and not node.get_input_node(node.inputs[3]) + isinstance(node, Quant) and len(node.inputs) == 1 and isinstance(node.get_input_node(node.inputs[0]), Constant) ) # Only match if the scale is power of 2 and the zero-point is 0s @@ -197,7 +199,7 @@ def transform(self, model, node): class QuantToAlphaActivationAlpha(OptimizerPass): """ - This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of + This is for the case when scale is not power-of-2 or zeropt is not 0. It is a a 1:3 transformation of a Quant to an ApplyAlpha (to scale), Activatio, ApplyAlpho (to rescale). NOTE: It needs to be scheduled after QuantToActivation (or we need to make the match criteria stricter) @@ -207,10 +209,8 @@ def match(self, node): # only matches after the other inputs are already folded is_match = ( isinstance(node, Quant) + and len(node.inputs) == 1 and not isinstance(node.get_input_node(node.inputs[0]), Constant) - and not node.get_input_node(node.inputs[1]) - and not node.get_input_node(node.inputs[2]) - and not node.get_input_node(node.inputs[3]) ) return is_match @@ -265,7 +265,7 @@ def transform(self, model, node): class ConstQuantToConstAlpha(OptimizerPass): """ - This is for the case when scale is not 1 or zeropt is not 0. It is a a 1:3 transformation of + This is for the case when scale is not power-of-2 or zeropt is not 0. It is a a 1:3 transformation of a Quant to an ApplyAlpha (to scale), Activation, ApplyAlpho (to unscale), but an input consts allows for optimization, so the ApplyAlpha (to scale), Activation are optimized away right away. @@ -274,11 +274,7 @@ class ConstQuantToConstAlpha(OptimizerPass): def match(self, node): # only matches after the other inputs are already folded is_match = ( - isinstance(node, Quant) - and isinstance(node.get_input_node(node.inputs[0]), Constant) - and not node.get_input_node(node.inputs[1]) - and not node.get_input_node(node.inputs[2]) - and not node.get_input_node(node.inputs[3]) + isinstance(node, Quant) and len(node.inputs) == 1 and isinstance(node.get_input_node(node.inputs[0]), Constant) ) if is_match: # to make sure this is a quant node with inputs @@ -292,10 +288,6 @@ def transform(self, model, node): Change Constant + Quant node to Constant, ApplyAlpha """ - # Do the Activation as in the simple case - - n_in = node.get_input_variable().size() - rounding_mode = node.get_attr('rounding_mode') narrow = node.get_attr('narrow') signed = node.get_attr('signed') @@ -318,7 +310,6 @@ def transform(self, model, node): const_node.initialize() attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes_rescale.update({'n_in': n_in, 'n_out': n_in, 'n_filt': -1}) rescale = scale rebias = -bias * scale From cf5c9a105f27ffe3d2a81269c5664565e3362ffd Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Wed, 7 Feb 2024 10:24:33 -0600 Subject: [PATCH 16/62] snapshot --- hls4ml/model/layers.py | 24 ++- .../model/optimizer/passes/batchnorm_opt.py | 150 +++++++++++++----- hls4ml/model/optimizer/passes/bn_fuse.py | 41 ++++- .../model/optimizer/passes/conv_to_convxd.py | 10 +- .../optimizer/passes/matmul_const_to_dense.py | 5 +- hls4ml/model/optimizer/passes/merge_const.py | 70 +++++--- hls4ml/model/optimizer/passes/quant_opt.py | 6 +- hls4ml/model/quantizers.py | 12 +- hls4ml/model/types.py | 23 ++- 9 files changed, 248 insertions(+), 93 deletions(-) diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index 7da730b60a..b5d9f492af 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -22,6 +22,7 @@ IntegerPrecisionType, NamedType, TensorVariable, + UnspecifiedPrecisionType, WeightVariable, find_minimum_width, ) @@ -361,7 +362,12 @@ def initialize(self): shape = (1,) self.set_attr('value', np.array([value])) dims = [f'{self.name}_{i}' for i in range(len(shape))] - self.add_output_variable(shape, dims, var_name=self.name, precision=self.get_attr("precision")) + quantizer = self.get_attr('quantizer') + + # Should the else clause below be None or UnspecifiedPrecisionType + precision = quantizer.hls_type if quantizer is not None else UnspecifiedPrecisionType() + + self.add_output_variable(shape, dims, var_name=self.name, precision=precision) class Quant(Layer): # The QONNX quantization layer @@ -901,6 +907,7 @@ def initialize(self): self.add_output_variable(shape, dims) +# TODO: We currently seem to ignore the quantizers to mean, variance, etc. class BatchNormalization(Layer): _expected_attributes = [ Attribute('n_in'), @@ -943,19 +950,22 @@ def initialize(self): self.add_output_variable(shape, dims) self.set_attr('n_in', inp.size()) + # precision values are ignored if quantizer is not None scale = self.get_attr('scale_data') scale_quantizer = self.get_attr('scale_quantizer') + scale_precision = self.get_attr('scale_precision') bias = self.get_attr('bias_data') bias_quantizer = self.get_attr('bias_quantizer') + bias_precision = self.get_attr('bias_precision') - self.add_weights(scale, quantizer=scale_quantizer) - self.add_bias(bias, quantizer=bias_quantizer) + self.add_weights(scale, quantizer=scale_quantizer, precision=scale_precision) + self.add_bias(bias, quantizer=bias_quantizer, precision=bias_precision) - def add_weights(self, scale, quantizer=None): - self.add_weights_variable(name='scale', var_name='s{index}', data=scale, quantizer=quantizer) + def add_weights(self, scale, quantizer=None, precision=None): + self.add_weights_variable(name='scale', var_name='s{index}', data=scale, quantizer=quantizer, precision=precision) - def add_bias(self, bias, quantizer=None): - self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer) + def add_bias(self, bias, quantizer=None, precision=None): + self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer, precision=precision) class Merge(Layer): diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py index a74047676d..3e0984dccb 100644 --- a/hls4ml/model/optimizer/passes/batchnorm_opt.py +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -1,7 +1,9 @@ import numpy as np -from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant +from hls4ml.model.layers import ApplyAlpha, BatchNormalization, BatchNormOnnx, Constant from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.quantizers import QuantNodeQuantizer +from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, UnspecifiedPrecisionType _base_attributes = ('Trace', 'reuse_factor', 'epsilon', 'n_in', 'n_filt') @@ -17,49 +19,55 @@ def match(self, node): def transform(self, model, node): """ Remove Constant from the BatchNormalization node parameters (but not input[0]) + + TODO: Currently the quantizers are not actually used by the underlying layer. """ if not (len(node.inputs) == 5 and all(node.inputs)): - raise ValueError(f"All {len.node.inputs} BatchNormOnnnx inputs need to be defined") + raise ValueError(f'All {len.node.inputs} BatchNormOnnnx inputs need to be defined') attributes = {k: node.attributes.get(k, None) for k in _base_attributes} gamma_node = node.get_input_node(node.inputs[1]) if not isinstance(gamma_node, Constant): - raise TypeError("Only consant gammas supported") - gamma = gamma_node.value + raise TypeError('Only consant gammas supported') + gamma = gamma_node.attributes['value'] attributes['gamma_data'] = gamma + attributes['gamma_quantizer'] = gamma_node.get_attr['quantizer'] + node.inputs[1] = '' model.remove_node(gamma_node, rewire=False) beta_node = node.get_input_node(node.inputs[2]) if not isinstance(beta_node, Constant): - raise TypeError("Only consant betas supported") - beta = beta_node.value + raise TypeError('Only consant betas supported') + beta = beta_node.attributes['value'] attributes['beta_data'] = beta + attributes['beta_quantizer'] = beta_node.get_attr['quantizer'] node.inputs[2] = '' model.remove_node(beta_node, rewire=False) moving_mean_node = node.get_input_node(node.inputs[3]) if not isinstance(moving_mean_node, Constant): - raise TypeError("Only consant moving_means supported") - moving_mean = moving_mean_node.value + raise TypeError('Only consant moving_means supported') + moving_mean = moving_mean_node.attributes['value'] attributes['mean_data'] = moving_mean + attributes['mean_quantizer'] = moving_mean_node.get_attr['quantizer'] node.inputs[3] = '' model.remove_node(moving_mean_node, rewire=False) moving_variance_node = node.get_input_node(node.inputs[4]) if not isinstance(moving_variance_node, Constant): - raise TypeError("Only consant moving_variances supported") - moving_variance = moving_variance_node.value + raise TypeError('Only consant moving_variances supported') + moving_variance = moving_variance_node.attributes['value'] attributes['variance_data'] = moving_variance + attributes['variance_quantizer'] = moving_variance_node.get_attr['quantizer'] node.inputs[4] = '' model.remove_node(moving_variance_node, rewire=False) - # scale = gamma / np.sqrt(moving_variance + node.get_attr('epsilon')) - # bias = beta - gamma * moving_mean / np.sqrt(moving_variance + node.get_attr('epsilon')) - # attributes["scale_data"] = scale - # attributes["bias_data"] = bias + node.inputs = [inp for inp in node.inputs if inp] + if len(node.inputs) != 1: + raise RuntimeError('The QONNX batchnomr had unexpected inputs.') new_node = model.make_node(BatchNormalization, node.name, attributes, [node.inputs[0]], [x for x in node.outputs]) @@ -78,7 +86,6 @@ def match(self, node): isinstance(node, BatchNormalization) and not any(node.inputs[1:]) and isinstance(node.get_input_node(node.inputs[0]), Constant) - and not node.get_input_node(node.inputs[0]).get_attr("quant_precision") ) return is_match @@ -88,13 +95,48 @@ def transform(self, model, node): """ const_node = node.get_input_node(node.inputs[0]) - new_val = const_node.value * node.weights["scale"].data_unquantized + node.weights["bias"].data_unquantized - const_node.set_attr("value", new_val) - const_node.set_attr("quantizer", node.get_attr("quantizer")) # None if not defined - const_node.set_attr("quant_precision", node.get_attr("quant_precision")) - - # reinitialize (which also runs quantization if quantizer exists) - const_node.initialize() + const_prec = const_node.get_output_variable().type.precision + + new_val = const_node.value * node.weights['scale'].data_unquantized + node.weights['bias'].data_unquantized + + const_node.set_attr('value', new_val) + const_node.set_attr('quantizer', node.get_attr('quantizer')) # None if not defined + + if isinstance(node.get_output_variable().type.precision, UnspecifiedPrecisionType): + if isinstance(const_prec, UnspecifiedPrecisionType): + pass # leave it as is + else: + const_node.get_output_variable().type.precision = UnspecifiedPrecisionType() # default + # propagate precision + scale_q = node.get_attr('scale_quantizer') + bias_q = node.get_attr('bias_quantizer') + if scale_q and bias_q: + # propagate precsion + scale_prec = scale_q.hls_type + bias_prec = bias_q.hls_type + if scale_prec not in (IntegerPrecisionType, FixedPrecisionType) or bias_prec not in ( + IntegerPrecisionType, + FixedPrecisionType, + ): + print("Warning: output type not propagated for constant merge") + else: + signed_prod = const_prec.signed or scale_prec.signed + w_prod = const_prec.width + scale_prec.width + i_prod = const_prec.integer + scale_prec.integer + signed = signed_prod or bias_prec.signed + i_tot = ( + max( + i_prod + (bias_prec.signed and not signed_prod), + bias_prec.ingeter + (signed_prod and not bias_prec.signed), + ) + + 1 + ) + w_tot = i_tot + max(w_prod - i_prod, bias_prec.width - bias_prec.integer) + new_prec = FixedPrecisionType(w_tot, i_tot, signed) + const_node.set_attr('quantizer', QuantNodeQuantizer(new_prec)) + const_node.get_output_variable().type.precision = new_prec + else: + const_node.get_output_variable().type.precision = node.get_output_variable().type.precision # remove the batch norm node model.remove_node(node, rewire=True) @@ -103,17 +145,21 @@ def transform(self, model, node): class FuseConsecutiveBatchNormalization(OptimizerPass): - ''' + """ OptimizerPass to merge consecutive BatchNormalization layers, only if the earlier one does not have quantization specified - ''' + + Note: Consider restricting this to ApplyAlpha. Batch Normalization quantization seems to be ignored. + + Note: This optimizer may not be safe if weights are updateable. May need to turn off. + """ def match(self, node): prev_node = node.get_input_node(node.inputs[0]) basic_match = ( - isinstance(node, BatchNormalization) - and isinstance(prev_node, BatchNormalization) - and not prev_node.get_attr("quant_precision") + isinstance(node, ApplyAlpha) + and isinstance(prev_node, ApplyAlpha) + and isinstance(prev_node.get_output_variable().type.precision, UnspecifiedPrecisionType) ) # check for compatibility to merge @@ -123,12 +169,12 @@ def match(self, node): s1 = node.weights['scale'].data_unquantized b1 = node.weights['bias'].data_unquantized scale_compatible = ( - (prev_node.get_attr("scale_quantizer") is None and node.get_attr("scale_quantizer") is None) + (prev_node.get_attr('scale_quantizer') is None and node.get_attr('scale_quantizer') is None) or (s0 == np.ones_like(s0)).all() or (s1 == np.ones_like(s1)).all() ) bias_compatible = ( - (prev_node.get_attr("bias_quantizer") is None and node.get_attr("bias_quantizer") is None) + (prev_node.get_attr('bias_quantizer') is None and node.get_attr('bias_quantizer') is None) or (b0 == np.zeros_like(b0)).all() or (b1 == np.zeros_like(b1)).all() ) @@ -139,31 +185,57 @@ def match(self, node): def transform(self, model, node): prev_node = node.get_input_node(node.inputs[0]) + prev_map = prev_node.get_output_use_map() + if len(prev_map[prev_node.outputs[0]]) > 1: + return False + + # # Not sure why this part is needed + # node_map = node.get_output_use_map() + # if len(node_map[node.outputs[0]]) > 1: + # return False + s0 = prev_node.weights['scale'].data_unquantized b0 = prev_node.weights['bias'].data_unquantized s1 = node.weights['scale'].data_unquantized b1 = node.weights['bias'].data_unquantized s_quantizer = ( - node.get_attr("scale_quantizer") if (s0 == np.ones_like(s0)).all() else prev_node.get_attr("scale_quantizer") + node.get_attr('scale_quantizer') if (s0 == np.ones_like(s0)).all() else prev_node.get_attr('scale_quantizer') ) b_quantizer = ( - node.get_attr("bias_quantizer") if (b0 == np.zeros_like(b0)).all() else prev_node.get_attr("bias_quantizer") + node.get_attr('bias_quantizer') if (b0 == np.zeros_like(b0)).all() else prev_node.get_attr('bias_quantizer') ) - node.set_attr("scale_quantizer", s_quantizer) - node.set_attr("bias_quantizer", b_quantizer) - if s_quantizer: - node.set_attr("scale_precision", s_quantizer.hls_type) - if b_quantizer: - node.set_attr("bias_precision", b_quantizer.hls_type) + node.set_attr('scale_quantizer', s_quantizer) + node.set_attr('bias_quantizer', b_quantizer) scale_new = s0 * s1 bias_new = s1 * b0 + b1 + # Not sure if this setting of this is useful + s_prec = None + if s_quantizer is None and (scale_new == np.ones_like(scale_new)).all(): + if ( + isinstance(prev_node.weights['scale'].type, IntegerPrecisionType) + and isinstance(node.weights['scale'].type, IntegerPrecisionType) + and prev_node.weights['scale'].type.width == 1 + and node.weights['scale'].type.width == 1 + ): + s_prec = node.weights['scale'].type + + b_prec = None + if b_quantizer is None and (bias_new == np.zeros_like(bias_new)).all(): + if ( + isinstance(prev_node.weights['bias'].type, IntegerPrecisionType) + and isinstance(node.weights['bias'].type, IntegerPrecisionType) + and prev_node.weights['bias'].type.width == 1 + and node.weights['bias'].type.width == 1 + ): + b_prec = node.weights['bias'].type + # call function so that quantizer would be called if needed - node.add_weights_variable(name='scale', var_name='s{index}', data=scale_new) - node.add_weights_variable(name='bias', var_name='b{index}', data=bias_new) + node.add_weights_variable(name='scale', var_name='s{index}', data=scale_new, quantizer=s_quantizer, precision=s_prec) + node.add_weights_variable(name='bias', var_name='b{index}', data=bias_new, quantizer=b_quantizer, precision=b_prec) model.remove_node(prev_node, rewire=True) return True diff --git a/hls4ml/model/optimizer/passes/bn_fuse.py b/hls4ml/model/optimizer/passes/bn_fuse.py index 02d9b849ed..3d79de7dc8 100644 --- a/hls4ml/model/optimizer/passes/bn_fuse.py +++ b/hls4ml/model/optimizer/passes/bn_fuse.py @@ -1,25 +1,50 @@ +import numpy as np + from hls4ml.model.layers import BatchNormalization, Conv1D, Conv2D, Dense from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.types import UnspecifiedPrecisionType class FuseBatchNormalization(OptimizerPass): def match(self, node): - is_match = ( + prev_node = node.get_input_node(node.inputs[0]) + basic_match = ( isinstance(node, BatchNormalization) - and isinstance(node.get_input_node(), (Dense, Conv1D, Conv2D)) - and node.get_input_node().get_attr('weight_quantizer') is None - and node.get_input_node().get_attr('bias_quantizer') is None + and isinstance(prev_node, (Dense, Conv1D, Conv2D)) + and isinstance(prev_node.get_output_variable().type.precision, UnspecifiedPrecisionType) ) - return is_match + if basic_match: + s0 = prev_node.weights['weight'].data_unquantized + b0 = prev_node.weights['bias'].data_unquantized + s1 = node.weights['scale'].data_unquantized + b1 = node.weights['bias'].data_unquantized + scale_compatible = ( + (prev_node.get_attr('weight_quantizer') is None and node.get_attr('scale_quantizer') is None) + or (s0 == np.ones_like(s0)).all() + or (s1 == np.ones_like(s1)).all() + ) + bias_compatible = ( + (prev_node.get_attr('bias_quantizer') is None and node.get_attr('bias_quantizer') is None) + or (b0 == np.zeros_like(b0)).all() + or (b1 == np.zeros_like(b1)).all() + ) + return scale_compatible and bias_compatible + + else: + return False def transform(self, model, node): - # Fuse weight and bias of Dense/Conv1D/Conv2D layer with BN values + """Fuse weight and bias of Dense/Conv1D/Conv2D layer with BN values.""" parent_node = node.get_input_node() parent_map = parent_node.get_output_use_map() - node_map = node.get_output_use_map() - if len(parent_map[parent_node.name]) > 1 or len(node_map[node.name]) > 1: + if len(parent_map[parent_node.outputs[0]]) > 1: return False + # # Not sure why this part is needed + # node_map = node.get_output_use_map() + # if len(node_map[node.outputs[0]]) > 1: + # return False + parent_weight = parent_node.weights['weight'] parent_bias = parent_node.weights['bias'] diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py index efc5f3e89b..b61b0340be 100644 --- a/hls4ml/model/optimizer/passes/conv_to_convxd.py +++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py @@ -2,6 +2,7 @@ from hls4ml.model.layers import Constant, Conv, Conv1D, Conv2D from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.quantizers import QuantNodeQuantizer from hls4ml.model.types import IntegerPrecisionType # these are attributes to copy @@ -48,13 +49,10 @@ def transform(self, model, node): """Convert Conv with constant to a Conv1D or Conv2D layer""" weight_node = node.get_input_node(node.inputs[1]) - weight_precision = weight_node.get_attr('quant_precision') weight_data = weight_node.attributes['value'] bias_node = None - bias_precision = None if len(node.inputs) == 3: bias_node = node.get_input_node(node.inputs[2]) - bias_precision = bias_node.get_attr('quant_precision') # creating the attributes attributes = {k: node.attributes.get(k, None) for k in _base_attributes} @@ -66,16 +64,16 @@ def transform(self, model, node): else: newtype = Conv2D attributes['weight_data'] = np.transpose(weight_data, (1, 2, 3, 0)) - attributes['weight_precision'] = weight_precision attributes['weight_quantizer'] = weight_node.get_attr('quantizer') if bias_node: attributes['bias_data'] = bias_node.attributes['value'] - attributes['bias_precision'] = bias_precision attributes['bias_quantizer'] = bias_node.get_attr('quantizer') + attributes['have_bias'] = True else: attributes['bias_data'] = np.zeros(attributes['n_filt']) - attributes['bias_precision'] = IntegerPrecisionType(1, False) + attributes['bias_quantizer'] = QuantNodeQuantizer(IntegerPrecisionType(1, False)) + attributes['have_bias'] = False # making new node new_node = model.make_node( diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py index 2a89ea0130..7eac0ccca3 100644 --- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py +++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py @@ -2,6 +2,7 @@ from hls4ml.model.layers import Constant, Dense, MatMul from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.quantizers import QuantNodeQuantizer from hls4ml.model.types import IntegerPrecisionType _base_attributes = ('Trace', 'reuse_factor', 'weight', 'weight_t', 'bias', 'bias_t') @@ -27,7 +28,6 @@ def transform(self, model, node): other_var = node.get_input_variable(node.inputs[0]) weight_data = const_node.attributes['value'] - weight_precision = const_node.get_attr('quant_precision') weight_quantizer = const_node.get_attr('quantizer') in_shape = other_var.shape @@ -40,10 +40,9 @@ def transform(self, model, node): attributes.update( { 'weight_data': weight_data, - 'weight_precision': weight_precision, 'weight_quantizer': weight_quantizer, 'bias_data': np.zeros(out_shape), - 'bias_precision': IntegerPrecisionType(1, False), + 'bias_quantizer': QuantNodeQuantizer(IntegerPrecisionType(1, False)), 'have_bias': False, 'n_in': n_in, 'n_out': n_out, diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py index 4b13982259..11848c9081 100644 --- a/hls4ml/model/optimizer/passes/merge_const.py +++ b/hls4ml/model/optimizer/passes/merge_const.py @@ -3,6 +3,7 @@ from hls4ml.model.layers import ApplyAlpha, Constant, Merge from hls4ml.model.optimizer import OptimizerPass from hls4ml.model.quantizers import QuantNodeQuantizer +from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType _base_attributes = ('Trace', 'reuse_factor', 'n_in') @@ -57,10 +58,6 @@ def transform(self, model, node): const_node0.set_attr('value', new_val) - quant_precision = node.get_attr('quant_precision') - if quant_precision: - const_node0.set_attr('quant_precision', quant_precision) - # reinitialize (which also runs quantization if quantizer exists) const_node0.initialize() @@ -101,6 +98,7 @@ def transform(self, model, node): input_shape = node.get_input_variable(node.inputs[input_node_idx]).shape n_in = np.prod(input_shape) + # Note: precision is ignored if quantizer is not None scale_precision = None scale_quantizer = None bias_precision = None @@ -109,30 +107,40 @@ def transform(self, model, node): op = node.attributes['op'] if op in ('add', 'sum'): scale = np.array(1) + scale_precision = IntegerPrecisionType(1, False) bias = const_node.attribute['value'] - bias_precision = const_node.get_attr('quant_precision') bias_quantizer = const_node.get_attr('quantizer') elif op == 'sub': + bias_quantizer = const_node.get_attr('quantizer') if node1const: scale = np.array(1) + scale_precision = IntegerPrecisionType(1, False) bias = -const_node.attribute['value'] + if ( + bias_quantizer is not None + and isinstance(bias_quantizer.hls_type, (IntegerPrecisionType, FixedPrecisionType)) + and not bias_quantizer.hls_type.signed + ): + # need to make signed and increas the bit, if unsigned + bias_precision = FixedPrecisionType( + bias_quantizer.hls_type.width + 1, + bias_quantizer.hls_type.integer + 1, + True, + bias_quantizer.hls_type.rounding_mode, + bias_quantizer.hls_type.saturation_mode, + bias_quantizer.hls_type.saturation_bits, + ) + bias_quantizer = QuantNodeQuantizer(bias_precision) else: scale = np.array(-1) + scale_precision = IntegerPrecisionType(2, True) bias = const_node.attribute['value'] - bias_precision = const_node.get_attr('quant_precision') - bias_quantizer = const_node.get_attr('quantizer') - if bias_precision and not bias_precision.signed: - # need to add a bit - bias_precision.signed = 1 - bias_precision.width += 1 - bias_precision.integer += 1 - bias_quantizer = QuantNodeQuantizer(bias_precision) elif op == 'mul': scale = const_node.attribute['value'] - bias = np.array(0) - scale_precision = const_node.get_attr('quant_precision') scale_quantizer = const_node.get_attr('quantizer') + bias = np.array(0) + bias_precision = IntegerPrecisionType(1, False) # because C++ doesn't do broadcasting, we may have to change the shapes of the scale and bias if scale.shape != tuple(input_shape) and np.squeeze(scale).shape != tuple(input_shape): @@ -155,12 +163,12 @@ def transform(self, model, node): } ) - bn_layer = model.make_node( + aa_layer = model.make_node( ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[input_node_idx]], [x for x in node.outputs] ) model.remove_node(const_node, rewire=False) - model.replace_node(node, bn_layer) + model.replace_node(node, aa_layer) return True @@ -186,7 +194,23 @@ def transform(self, model, node): n_in = np.prod(input_shape) const_node = node.get_input_node(node.inputs[1]) scale = 1 / const_node.attribute['value'] + scale_quantizer = const_node.get_attr('quantizer') + if scale_quantizer: + scale_precision = scale_quantizer.hls_type + i_new = 1 + int(scale_precision.signed) + scale_precision.fractional + w_new = 1 + int(scale_precision.signed) + max(scale_precision.fractional, 0) + new_scale_precision = FixedPrecisionType( + w_new, + i_new, + scale_precision.signed, + rounding_mode=scale_precision.rounding_mode, + saturation_mode=scale_precision.saturation_mode, + saturation_bits=scale_precision.saturation_bits, + ) + scale_quantizer = QuantNodeQuantizer(new_scale_precision) + bias = np.array(0) + bias_precision = IntegerPrecisionType(1, False) # because C++ doesn't do broadcasting, we may have to change the shapes of the scale and bias if scale.shape != tuple(input_shape) and np.squeeze(scale).shape != tuple(input_shape): @@ -195,7 +219,17 @@ def transform(self, model, node): bias = np.broadcast_to(bias, input_shape) attributes = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes.update({'scale_data': scale, 'bias_data': bias, 'n_in': n_in, 'n_out': n_in, 'n_filt': -1}) + attributes.update( + { + 'scale_data': scale, + 'bias_data': bias, + 'scale_quantizer': scale_quantizer, + 'bias_precision': bias_precision, + 'n_in': n_in, + 'n_out': n_in, + 'n_filt': -1, + } + ) bn_layer = model.make_node(ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs]) diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py index e49ff99bd7..79d92ec4d1 100644 --- a/hls4ml/model/optimizer/passes/quant_opt.py +++ b/hls4ml/model/optimizer/passes/quant_opt.py @@ -131,7 +131,7 @@ def transform(self, model, node): precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode) attributes = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer}) + attributes.update({'activation': 'linear', 'quantizer': quantizer}) new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs]) new_node.get_output_variable().type.precision = precision @@ -187,7 +187,6 @@ def transform(self, model, node): precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode) const_node = node.get_input_node(node.inputs[0]) - const_node.set_attr('quant_precision', precision) const_node.set_attr('quantizer', quantizer) const_node.get_output_variable().type.precision = precision @@ -229,7 +228,7 @@ def transform(self, model, node): precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode) attributes = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes.update({'activation': 'linear', 'quant_precision': precision, 'quantizer': quantizer}) + attributes.update({'activation': 'linear', 'quantizer': quantizer}) new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs]) new_node.get_output_variable().type.precision = precision @@ -303,7 +302,6 @@ def transform(self, model, node): # caclucate the new value new_val = const_node.get_attr('value') / scale + bias const_node.set_attr('value', new_val) - const_node.set_attr('quant_precision', precision) const_node.set_attr('quantizer', quantizer) # reinitialize (which also runs quantization if quantizer exists) diff --git a/hls4ml/model/quantizers.py b/hls4ml/model/quantizers.py index c0a5869d5b..cadcdbbc3d 100644 --- a/hls4ml/model/quantizers.py +++ b/hls4ml/model/quantizers.py @@ -102,7 +102,7 @@ def __init__(self, config): self.bits = 1 self.hls_type = XnorPrecisionType() else: - print("Unsupported quantizer: " + config['class_name']) + print('Unsupported quantizer: ' + config['class_name']) self.bits = 16 self.hls_type = FixedPrecisionType(width=16, integer=6, signed=True) @@ -177,8 +177,8 @@ class QuantNodeQuantizer(Quantizer): def __init__(self, precision): super().__init__(precision.width, precision) - if not isinstance(precision, FixedPrecisionType): - raise TypeError("QuantNodeQuantizer is only defined for FixedPrecisionType") + if not isinstance(precision, (FixedPrecisionType, IntegerPrecisionType)): + raise TypeError('QuantNodeQuantizer is only defined for FixedPrecisionType and IntegerPrecisionType') def __call__(self, data): """Apply the quantization on the data""" @@ -216,7 +216,7 @@ def _min_int(signed: bool, saturation_mode: str, bit_width: int) -> int: int(0) """ if saturation_mode not in (SaturationMode.SAT_SYM, SaturationMode.SAT): - raise ValueError(f"Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported") + raise ValueError(f'Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported') if signed and saturation_mode == SaturationMode.SAT_SYM: value = -(2 ** (bit_width - 1)) + 1 elif signed: @@ -253,9 +253,9 @@ def _resolve_rounding_mode(mode): to the corresponding numpy functions.""" if mode == RoundingMode.RND_CONV: return np.round - # elif mode_string == "CEIL": # not supported + # elif mode_string == 'CEIL': # not supported # return np.ceil elif mode == RoundingMode.TRN: return np.floor else: - raise ValueError(f"Rounding mode {mode} not supported.") + raise ValueError(f'Rounding mode {mode} not supported.') diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py index 8c182f4cca..f9e75a7d87 100644 --- a/hls4ml/model/types.py +++ b/hls4ml/model/types.py @@ -81,7 +81,6 @@ class IntegerPrecisionType(PrecisionType): def __init__(self, width=16, signed=True): super().__init__(width=width, signed=signed) - self.integer = width self.fractional = 0 def __str__(self): @@ -96,6 +95,22 @@ def __eq__(self, other): eq = eq and self.fractional == other.fractional return eq + @property + def integer(self): + return self.width + + @property + def rounding_mode(self): + return RoundingMode.TRN + + @property + def saturation_mode(self): + return SaturationMode.WRAP + + @property + def saturation_bits(self): + return None + class FixedPrecisionType(PrecisionType): """Arbitrary precision fixed-point data type. @@ -114,11 +129,15 @@ class FixedPrecisionType(PrecisionType): def __init__(self, width=16, integer=6, signed=True, rounding_mode=None, saturation_mode=None, saturation_bits=None): super().__init__(width=width, signed=signed) self.integer = integer - self.fractional = width - integer self.rounding_mode = rounding_mode self.saturation_mode = saturation_mode self.saturation_bits = saturation_bits + # make this a property to avoid inconsistencies + @property + def fractional(self): + self.width - self.integer + @property def rounding_mode(self): return self._rounding_mode From 81f3e53533984ca67e24a1bd485b3135910e9e2e Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Wed, 7 Feb 2024 19:44:00 -0600 Subject: [PATCH 17/62] bug fixes from attempting to run --- hls4ml/model/layers.py | 2 +- hls4ml/model/optimizer/__init__.py | 1 + .../model/optimizer/passes/batchnorm_opt.py | 29 +++++++++ hls4ml/model/optimizer/passes/bn_fuse.py | 65 +++++++++++++++++-- hls4ml/model/optimizer/passes/linear.py | 12 ++-- .../optimizer/passes/matmul_const_to_dense.py | 1 + hls4ml/model/optimizer/passes/merge_const.py | 13 ++-- hls4ml/model/optimizer/passes/move_scales.py | 4 +- hls4ml/model/quantizers.py | 6 +- hls4ml/model/types.py | 17 +++-- 10 files changed, 124 insertions(+), 26 deletions(-) diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index b5d9f492af..ebf7af2124 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -403,7 +403,7 @@ def initialize(self): # for QONNX, remove batch dimension # (onnx cleaning should have removed reshapes not on data path) if isinstance(shape_node, Constant): - target_shape = shape_node.value[1:] + target_shape = shape_node.attributes['value'][1:] else: raise RuntimeError("Reshape for ONNX requires the target shape to be a second input.") diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index ebe4934029..bd4da19071 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -69,6 +69,7 @@ 'eliminate_linear_activation', 'fuse_consecutive_batch_normalization', 'fuse_batch_normalization', + 'remove_nop_batch_normalization', 'replace_multidimensional_dense_with_conv', 'infer_precision_types', 'set_precision_concat', diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py index 3e0984dccb..f633d763c8 100644 --- a/hls4ml/model/optimizer/passes/batchnorm_opt.py +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -194,6 +194,15 @@ def transform(self, model, node): # if len(node_map[node.outputs[0]]) > 1: # return False + # only merge if the types are integer or fixed + if ( + not isinstance(prev_node.weights['scale'].type, (IntegerPrecisionType, FixedPrecisionType)) + or not isinstance(prev_node.weights['bias'].type, (IntegerPrecisionType, FixedPrecisionType)) + or not isinstance(node.weights['scale'].type, (IntegerPrecisionType, FixedPrecisionType)) + or not isinstance(node.weights['bias'].type, (IntegerPrecisionType, FixedPrecisionType)) + ): + return False + s0 = prev_node.weights['scale'].data_unquantized b0 = prev_node.weights['bias'].data_unquantized s1 = node.weights['scale'].data_unquantized @@ -239,3 +248,23 @@ def transform(self, model, node): model.remove_node(prev_node, rewire=True) return True + + +class RemoveNopBatchNormalization(OptimizerPass): + """ + OptimizerPass to remove batch normalizations that do nothing (scale 1, bias 0) + + Note: This optimizer may not be safe if weights are updateable. + """ + + def match(self, node): + if isinstance(node, BatchNormalization): + s0 = node.weights['scale'].data_unquantized + b0 = node.weights['bias'].data_unquantized + return (s0 == np.ones_like(s0)).all() and (b0 == np.zeros_like(b0)).all() + else: + return False + + def transform(self, model, node): + model.remove_node(node, rewire=True) + return True diff --git a/hls4ml/model/optimizer/passes/bn_fuse.py b/hls4ml/model/optimizer/passes/bn_fuse.py index 3d79de7dc8..a636af2f86 100644 --- a/hls4ml/model/optimizer/passes/bn_fuse.py +++ b/hls4ml/model/optimizer/passes/bn_fuse.py @@ -2,10 +2,19 @@ from hls4ml.model.layers import BatchNormalization, Conv1D, Conv2D, Dense from hls4ml.model.optimizer import OptimizerPass -from hls4ml.model.types import UnspecifiedPrecisionType +from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, UnspecifiedPrecisionType class FuseBatchNormalization(OptimizerPass): + """ + OptimizerPass to merge BatchNormalization layers, + only if the earlier one does not have quantization specified + + Note: Consider restricting this to ApplyAlpha. Batch Normalization quantization seems to be ignored. + + Note: This optimizer may not be safe if weights are updateable. May need to turn off. + """ + def match(self, node): prev_node = node.get_input_node(node.inputs[0]) basic_match = ( @@ -51,13 +60,59 @@ def transform(self, model, node): bn_scale = node.weights['scale'] bn_bias = node.weights['bias'] + # only merge if the types are integer or fixed + if ( + not isinstance(parent_weight.type, (IntegerPrecisionType, FixedPrecisionType)) + or not isinstance(parent_bias.type, (IntegerPrecisionType, FixedPrecisionType)) + or not isinstance(bn_scale.type, (IntegerPrecisionType, FixedPrecisionType)) + or not isinstance(bn_bias.type, (IntegerPrecisionType, FixedPrecisionType)) + ): + return False + fused_weight = bn_scale.data * parent_weight.data fused_bias = bn_scale.data * parent_bias.data + bn_bias.data + w_quantizer = ( + node.get_attr('scale_quantizer') + if (parent_weight.data == np.ones_like(parent_weight.data)).all() + else parent_node.get_attr('weight_quantizer') + ) + b_quantizer = ( + node.get_attr('bias_quantizer') + if (parent_bias.data == np.zeros_like(parent_bias.data)).all() + else parent_node.get_attr('bias_quantizer') + ) + + node.set_attr('weight_quantizer', w_quantizer) + node.set_attr('bias_quantizer', b_quantizer) + + # Not sure if this setting of this is useful + w_prec = None + if w_quantizer is None and (fused_weight == np.ones_like(fused_weight)).all(): + if ( + isinstance(parent_weight.type, IntegerPrecisionType) + and isinstance(bn_scale.type, IntegerPrecisionType) + and parent_weight.type.width == 1 + and bn_scale.type.width == 1 + ): + w_prec = node.weights['scale'].type + + b_prec = None + if b_quantizer is None and (fused_bias == np.zeros_like(fused_bias)).all(): + if ( + isinstance(parent_bias.type, IntegerPrecisionType) + and isinstance(bn_bias.type, IntegerPrecisionType) + and parent_bias.type.width == 1 + and bn_bias.type.width == 1 + ): + b_prec = node.weights['bias'].type + + # call function so that quantizer would be called if needed + node.add_weights_variable( + name='weight', var_name='w{index}', data=fused_weight, quantizer=w_quantizer, precision=w_prec + ) + node.add_weights_variable(name='bias', var_name='b{index}', data=fused_bias, quantizer=b_quantizer, precision=b_prec) + model.remove_node(node, rewire=True) - parent_weight.data = fused_weight - parent_bias.data = fused_bias - if not parent_node.get_attr('use_bias', True): - parent_bias.update_precision(bn_bias.type.precision) return True diff --git a/hls4ml/model/optimizer/passes/linear.py b/hls4ml/model/optimizer/passes/linear.py index 72d6dade9f..78a808b9a1 100644 --- a/hls4ml/model/optimizer/passes/linear.py +++ b/hls4ml/model/optimizer/passes/linear.py @@ -1,5 +1,6 @@ from hls4ml.model.layers import Activation, BatchNormalization, Conv1D, Conv2D, Dense from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.types import UnspecifiedPrecisionType class EliminateLinearActivation(OptimizerPass): @@ -14,7 +15,6 @@ def transform(self, model, node): return True -# TODO: Move migrate this to auto precisoin check from quant precision check class MergeLinearActivation(OptimizerPass): ''' For many objects it's safe to change the output precision independently of the calculation. @@ -27,16 +27,14 @@ def match(self, node): if isinstance(node, Activation) and node.get_attr('activation') == 'linear': parent = node.get_input_node(node.inputs[0]) safe_parent = isinstance(parent, (Dense, Conv1D, Conv2D, BatchNormalization)) - parent_type_fixed = parent.get_attr("quant_precision") - return safe_parent and not parent_type_fixed + return safe_parent and isinstance(parent.get_output_variable().type.precision, UnspecifiedPrecisionType) else: return False def transform(self, model, node): prev_node = node.get_input_node(node.inputs[0]) - quant_precision = node.get_attr("quant_precision") - prev_node.set_attr("quant_precision", quant_precision) - prev_node.set_attr("quantizer", node.get_attr("quantizer")) - prev_node.update_output_precision(quant_precision) + quantizer = node.get_attr("quantizer") + prev_node.set_attr("quantizer", quantizer) + prev_node.update_output_precision(quantizer.hls_type) model.remove_node(node) return True diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py index 7eac0ccca3..889a376cee 100644 --- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py +++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py @@ -54,6 +54,7 @@ def transform(self, model, node): # removing and replacing old nodes model.remove_node(const_node, rewire=False) + del node.inputs[1] model.replace_node(node, new_dense) return True diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py index 11848c9081..8ffe053866 100644 --- a/hls4ml/model/optimizer/passes/merge_const.py +++ b/hls4ml/model/optimizer/passes/merge_const.py @@ -91,9 +91,11 @@ def transform(self, model, node): if node1const: const_node = node1 input_node_idx = 0 + const_node_idx = 1 else: const_node = node.get_input_node(node.inputs[0]) input_node_idx = 1 + const_node_idx = 0 input_shape = node.get_input_variable(node.inputs[input_node_idx]).shape n_in = np.prod(input_shape) @@ -108,14 +110,14 @@ def transform(self, model, node): if op in ('add', 'sum'): scale = np.array(1) scale_precision = IntegerPrecisionType(1, False) - bias = const_node.attribute['value'] + bias = const_node.attributes['value'] bias_quantizer = const_node.get_attr('quantizer') elif op == 'sub': bias_quantizer = const_node.get_attr('quantizer') if node1const: scale = np.array(1) scale_precision = IntegerPrecisionType(1, False) - bias = -const_node.attribute['value'] + bias = -const_node.attributes['value'] if ( bias_quantizer is not None and isinstance(bias_quantizer.hls_type, (IntegerPrecisionType, FixedPrecisionType)) @@ -134,10 +136,10 @@ def transform(self, model, node): else: scale = np.array(-1) scale_precision = IntegerPrecisionType(2, True) - bias = const_node.attribute['value'] + bias = const_node.attributes['value'] elif op == 'mul': - scale = const_node.attribute['value'] + scale = const_node.attributes['value'] scale_quantizer = const_node.get_attr('quantizer') bias = np.array(0) bias_precision = IntegerPrecisionType(1, False) @@ -168,6 +170,7 @@ def transform(self, model, node): ) model.remove_node(const_node, rewire=False) + del node.inputs[const_node_idx] model.replace_node(node, aa_layer) return True @@ -193,7 +196,7 @@ def transform(self, model, node): input_shape = node.get_input_variable().shape n_in = np.prod(input_shape) const_node = node.get_input_node(node.inputs[1]) - scale = 1 / const_node.attribute['value'] + scale = 1 / const_node.attributes['value'] scale_quantizer = const_node.get_attr('quantizer') if scale_quantizer: scale_precision = scale_quantizer.hls_type diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py index e97fd89947..fe1acb7f94 100644 --- a/hls4ml/model/optimizer/passes/move_scales.py +++ b/hls4ml/model/optimizer/passes/move_scales.py @@ -16,7 +16,7 @@ class ScaleDownMatMul(OptimizerPass): def match(self, node): ''' Check to see if we have a MatMul with at least one input ApplyAlpha. - Note, if both are this optimition runs twice. + Note, if both are this optimizer runs twice. ''' is_match = ( isinstance(node, MatMul) @@ -85,7 +85,7 @@ def transform(self, model, node): try: np.broadcast_to(scale, output.shape) # check size compatibility newscale = scale - newbias = inp[other_idx].value * bias + newbias = inp[other_idx].attributes['value'] * bias np.broadcast_to(newbias, output.shape) can_propagate = True except ValueError: diff --git a/hls4ml/model/quantizers.py b/hls4ml/model/quantizers.py index cadcdbbc3d..daae66fe45 100644 --- a/hls4ml/model/quantizers.py +++ b/hls4ml/model/quantizers.py @@ -215,8 +215,10 @@ def _min_int(signed: bool, saturation_mode: str, bit_width: int) -> int: >>> min_int(signed=False, saturation_mode='AP_SAT_SYM', bit_width=8) int(0) """ - if saturation_mode not in (SaturationMode.SAT_SYM, SaturationMode.SAT): - raise ValueError(f'Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported') + if saturation_mode not in (SaturationMode.SAT_SYM, SaturationMode.SAT, SaturationMode.WRAP): + raise ValueError( + f'Saturation mode {saturation_mode} not supported. Only AP_SAT_SYM, AP_SAT supported, WRAP partially' + ) if signed and saturation_mode == SaturationMode.SAT_SYM: value = -(2 ** (bit_width - 1)) + 1 elif signed: diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py index f9e75a7d87..9fe6867262 100644 --- a/hls4ml/model/types.py +++ b/hls4ml/model/types.py @@ -81,12 +81,12 @@ class IntegerPrecisionType(PrecisionType): def __init__(self, width=16, signed=True): super().__init__(width=width, signed=signed) - self.fractional = 0 def __str__(self): typestring = '{signed}int<{width}>'.format(signed='u' if not self.signed else '', width=self.width) return typestring + # Does this need to make sure other is also an IntegerPrecisionType? I could see a match between Fixed and Integer def __eq__(self, other): eq = self.width == other.width eq = eq and self.signed == other.signed @@ -99,6 +99,10 @@ def __eq__(self, other): def integer(self): return self.width + @property + def fractional(self): + return 0 + @property def rounding_mode(self): return RoundingMode.TRN @@ -134,9 +138,10 @@ def __init__(self, width=16, integer=6, signed=True, rounding_mode=None, saturat self.saturation_bits = saturation_bits # make this a property to avoid inconsistencies + @property def fractional(self): - self.width - self.integer + return self.width - self.integer @property def rounding_mode(self): @@ -144,7 +149,9 @@ def rounding_mode(self): @rounding_mode.setter def rounding_mode(self, mode): - if isinstance(mode, str): + if mode is None: + self._rounding_mode = RoundingMode.TRN + elif isinstance(mode, str): self._rounding_mode = RoundingMode.from_string(mode) else: self._rounding_mode = mode @@ -155,7 +162,9 @@ def saturation_mode(self): @saturation_mode.setter def saturation_mode(self, mode): - if isinstance(mode, str): + if mode is None: + self._saturation_mode = SaturationMode.WRAP + elif isinstance(mode, str): self._saturation_mode = SaturationMode.from_string(mode) else: self._saturation_mode = mode From 9a74e46e33a715054496b408870675a35d4e19df Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Sun, 11 Feb 2024 18:07:24 -0600 Subject: [PATCH 18/62] fix some bugs from qonnx pytest --- .../model/optimizer/passes/batchnorm_opt.py | 19 ++++++++++--------- hls4ml/model/optimizer/passes/merge_const.py | 2 ++ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py index f633d763c8..ee00ecfa46 100644 --- a/hls4ml/model/optimizer/passes/batchnorm_opt.py +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -1,6 +1,6 @@ import numpy as np -from hls4ml.model.layers import ApplyAlpha, BatchNormalization, BatchNormOnnx, Constant +from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant from hls4ml.model.optimizer import OptimizerPass from hls4ml.model.quantizers import QuantNodeQuantizer from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, UnspecifiedPrecisionType @@ -33,7 +33,7 @@ def transform(self, model, node): raise TypeError('Only consant gammas supported') gamma = gamma_node.attributes['value'] attributes['gamma_data'] = gamma - attributes['gamma_quantizer'] = gamma_node.get_attr['quantizer'] + attributes['gamma_quantizer'] = gamma_node.get_attr('quantizer') node.inputs[1] = '' model.remove_node(gamma_node, rewire=False) @@ -43,7 +43,7 @@ def transform(self, model, node): raise TypeError('Only consant betas supported') beta = beta_node.attributes['value'] attributes['beta_data'] = beta - attributes['beta_quantizer'] = beta_node.get_attr['quantizer'] + attributes['beta_quantizer'] = beta_node.get_attr('quantizer') node.inputs[2] = '' model.remove_node(beta_node, rewire=False) @@ -52,7 +52,7 @@ def transform(self, model, node): raise TypeError('Only consant moving_means supported') moving_mean = moving_mean_node.attributes['value'] attributes['mean_data'] = moving_mean - attributes['mean_quantizer'] = moving_mean_node.get_attr['quantizer'] + attributes['mean_quantizer'] = moving_mean_node.get_attr('quantizer') node.inputs[3] = '' model.remove_node(moving_mean_node, rewire=False) @@ -61,13 +61,13 @@ def transform(self, model, node): raise TypeError('Only consant moving_variances supported') moving_variance = moving_variance_node.attributes['value'] attributes['variance_data'] = moving_variance - attributes['variance_quantizer'] = moving_variance_node.get_attr['quantizer'] + attributes['variance_quantizer'] = moving_variance_node.get_attr('quantizer') node.inputs[4] = '' model.remove_node(moving_variance_node, rewire=False) node.inputs = [inp for inp in node.inputs if inp] if len(node.inputs) != 1: - raise RuntimeError('The QONNX batchnomr had unexpected inputs.') + raise RuntimeError('The QONNX batchnorm had unexpected inputs.') new_node = model.make_node(BatchNormalization, node.name, attributes, [node.inputs[0]], [x for x in node.outputs]) @@ -76,6 +76,7 @@ def transform(self, model, node): return True +# Most likely this case is removed by qonnx cleaning class ConstantBatchNormFusion(OptimizerPass): """ Merge BatchNorm into Const (after parameters have already been merged in BatchNormalization) @@ -149,7 +150,7 @@ class FuseConsecutiveBatchNormalization(OptimizerPass): OptimizerPass to merge consecutive BatchNormalization layers, only if the earlier one does not have quantization specified - Note: Consider restricting this to ApplyAlpha. Batch Normalization quantization seems to be ignored. + Note: Consider restricting this to ApplyAlpha. Batch Normalization-style quantization seems to be ignored. Note: This optimizer may not be safe if weights are updateable. May need to turn off. """ @@ -157,8 +158,8 @@ class FuseConsecutiveBatchNormalization(OptimizerPass): def match(self, node): prev_node = node.get_input_node(node.inputs[0]) basic_match = ( - isinstance(node, ApplyAlpha) - and isinstance(prev_node, ApplyAlpha) + isinstance(node, BatchNormalization) + and isinstance(prev_node, BatchNormalization) and isinstance(prev_node.get_output_variable().type.precision, UnspecifiedPrecisionType) ) diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py index 8ffe053866..25bd59bda6 100644 --- a/hls4ml/model/optimizer/passes/merge_const.py +++ b/hls4ml/model/optimizer/passes/merge_const.py @@ -8,6 +8,7 @@ _base_attributes = ('Trace', 'reuse_factor', 'n_in') +# This should generally not happen because of qonnx cleaning class MergeTwoConstants(OptimizerPass): """Merge of two constants makes another constant""" @@ -237,6 +238,7 @@ def transform(self, model, node): bn_layer = model.make_node(ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs]) model.remove_node(const_node, rewire=False) + del node.inputs[1] model.replace_node(node, bn_layer) return True From 60a74bb49e906149f64401678bcb7f0ba4e4eff4 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Mon, 12 Feb 2024 09:59:48 -0600 Subject: [PATCH 19/62] fix assertion of not matching the number of inputs when replacing node --- hls4ml/model/optimizer/passes/conv_to_convxd.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py index b61b0340be..e54c98c1d7 100644 --- a/hls4ml/model/optimizer/passes/conv_to_convxd.py +++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py @@ -81,9 +81,11 @@ def transform(self, model, node): ) # removing and replacing old nodes - model.remove_node(weight_node, rewire=False) if bias_node: model.remove_node(bias_node, rewire=False) + del node.inputs[2] + model.remove_node(weight_node, rewire=False) + del node.inputs[1] model.replace_node(node, new_node) return True From 88a8d351b158145ef2d1f6d0a9daed9b159a7241 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 29 Feb 2024 16:36:54 -0600 Subject: [PATCH 20/62] update some precisions inference --- .../model/optimizer/passes/infer_precision.py | 121 ++++++++++++++++-- 1 file changed, 109 insertions(+), 12 deletions(-) diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py index 6f6a72097f..c660647d3b 100644 --- a/hls4ml/model/optimizer/passes/infer_precision.py +++ b/hls4ml/model/optimizer/passes/infer_precision.py @@ -1,11 +1,12 @@ import math -from copy import deepcopy import numpy as np from hls4ml.model.optimizer import ConfigurableOptimizerPass from hls4ml.model.types import FixedPrecisionType, UnspecifiedPrecisionType +# TODO: The code assumes everything is Fixed or Integer precision. Need to add checks + class InferPrecisionTypes(ConfigurableOptimizerPass): def __init__(self): @@ -36,7 +37,7 @@ def _infer_precision(self, node, types_to_infer): if node_class in ['Dense']: return self._infer_dense_precision(node, types_to_infer) - if node_class in ['BatchNormalization']: + if node_class in ['BatchNormalization', 'ApplyAlpha']: return self._infer_bn_precision(node, types_to_infer) if node_class in ['Conv1D', 'Conv2D', 'PointwiseConv1D', 'PointwiseConv2D', 'Conv2DBatchnorm']: @@ -51,9 +52,15 @@ def _infer_precision(self, node, types_to_infer): if node_class in ['Clone', 'Reshape', 'Resize', 'Transpose', 'ZeroPadding1D', 'ZeroPadding2D']: return self._infer_output_matching_precision(node, types_to_infer) - if node_class in ['Concatenate', 'Merge']: + if node_class in ['Merge']: return self._infer_merge_precision(node, types_to_infer) + if node_class in ['Concatenate']: + return self._infer_cat_precision(node, types_to_infer) + + if node_class in ['Dot']: + return self._infer_dot_precision(node, types_to_infer) + # What about quantized activation layer? Setting it to 'auto' manually will break it here. We should prevent # this in config_from_* functions @@ -124,6 +131,7 @@ def _infer_common_precision(self, node, types_to_infer, n_ops): bitwidth = integers + max(frac, bias_width - bias_integers) signed = signed or bias_signed + # Note: this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form. new_type = FixedPrecisionType(bitwidth, integers, signed) if 'accum_t' in types_to_infer: @@ -225,6 +233,11 @@ def _infer_sepconv_precision(self, node, types_to_infer): return inferred_types def _infer_bn_precision(self, node, types_to_infer): + """ + The batchnormalziation precision here is the more implementation-focused version. It propagates + precision from scale and bias, not mean, variance, etc. + """ + inferred_types = [] if 'scale_t' in types_to_infer: @@ -238,16 +251,28 @@ def _infer_bn_precision(self, node, types_to_infer): inferred_types.append('bias_t') if 'result_t' in types_to_infer: + input_precision = node.get_input_variable().type.precision scale_precision = node.types['scale_t'].precision bias_precision = node.types['bias_t'].precision - out_precision = deepcopy(node.get_input_node().get_output_variable().type.precision) - out_precision.integer += scale_precision.integer - out_precision.fractional = max(out_precision.fractional, scale_precision.fractional) + after_scale_signed = scale_precision.signed or input_precision.signed + after_scale_width = input_precision.width + scale_precision.width + after_scale_integer = input_precision.integer + scale_precision.integer + + out_precision_signed = after_scale_signed or bias_precision.signed + out_precision_integer = ( + max( + after_scale_integer + (bias_precision.signed and not after_scale_signed), + bias_precision.integer + (after_scale_signed and not bias_precision.signed), + ) + + 1 + ) + out_precision_width = out_precision_integer + max( + after_scale_width - after_scale_integer, bias_precision.fractional + ) - out_precision.integer = max(out_precision.integer, bias_precision.integer) + 1 - out_precision.fractional = max(out_precision.fractional, bias_precision.fractional) - out_precision.width = out_precision.fractional + out_precision.integer + # Note: this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form. + out_precision = FixedPrecisionType(out_precision_width, out_precision_integer, out_precision_signed) node.types['result_t'].name = node.name + '_result_t' node.types['result_t'].precision = out_precision @@ -288,10 +313,82 @@ def _infer_merge_precision(self, node, types_to_infer): input_1 = node.get_input_variable(node.inputs[0]).type.precision input_2 = node.get_input_variable(node.inputs[1]).type.precision - new_width = max(input_1.fractional, input_2.fractional) + max(input_1.integer, input_2.integer) - new_int = max(input_1.integer, input_2.integer) + op = node.get_attr('op').lower() + if op in ('add', 'subtract', 'average'): + new_signed = input_1.signed or input_2.signed or op == 'subtract' + new_int = ( + max( + input_1.integer + (input_2.signed and not input_1.signed), + input_2.integer + (input_1.signed and not input_2.signed), + ) + + 1 + ) + new_width = new_int + max(input_1.fractional, input_2.fractional) + + elif op == 'multiply': + new_signed = input_1.signed or input_2.signed + new_int = input_1.integer + input_2.integer + new_width = input_1.width + input_2.width + elif op in ('maximum', 'minimum'): + new_signed = input_1.signed or input_2.signed + + input_1_integer = input_1.integer + input_2_integer = input_2.integer + + # add one to integer if unsigned while new is signed + if new_signed and not input_1.signed: + input_1_integer += 1 + if new_signed and not input_2.signed: + input_2_integer += 1 + + new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer) + new_int = max(input_1_integer, input_2_integer) + + out_precision = FixedPrecisionType(new_width, new_int, new_signed) + node.types['result_t'].name = node.name + '_result_t' + node.types['result_t'].precision = out_precision + + return ['result_t'] + + def _infer_cat_precision(self, node, types_to_infer): + assert 'result_t' in types_to_infer and len(types_to_infer) == 1 + + input_1 = node.get_input_variable(node.inputs[0]).type.precision + input_2 = node.get_input_variable(node.inputs[1]).type.precision + + new_signed = input_1.signed or input_2.signed + + input_1_integer = input_1.integer + input_2_integer = input_2.integer + + # add one to integer if unsigned while new is signed + if new_signed and not input_1.signed: + input_1_integer += 1 + if new_signed and not input_2.signed: + input_2_integer += 1 + + new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer) + new_int = max(input_1_integer, input_2_integer) + + out_precision = FixedPrecisionType(new_width, new_int, new_signed) + node.types['result_t'].name = node.name + '_result_t' + node.types['result_t'].precision = out_precision + + return ['result_t'] + + def _infer_dot_precision(self, node, types_to_infer): + assert 'result_t' in types_to_infer and len(types_to_infer) == 1 + + input_1 = node.get_input_variable(node.inputs[0]).type.precision + input_2 = node.get_input_variable(node.inputs[1]).type.precision + + n_in = node.get_input_variable(node.inputs[0]).shape[0] + + new_signed = input_1.signed or input_2.signed + new_width = input_1.width + input_2.width + math.ceil(np.log2(n_in)) + new_int = input_1.integer + input_2.integer + math.ceil(np.log2(n_in)) - out_precision = FixedPrecisionType(new_width, new_int) + out_precision = FixedPrecisionType(new_width, new_int, new_signed) node.types['result_t'].name = node.name + '_result_t' node.types['result_t'].precision = out_precision From 10a3c500b79ad1b4fded24c860f41ad9732a4afb Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 29 Feb 2024 17:33:24 -0600 Subject: [PATCH 21/62] extract bitwidth from size 1 array in quant node --- hls4ml/model/optimizer/passes/quant_opt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py index 79d92ec4d1..0d02124bc6 100644 --- a/hls4ml/model/optimizer/passes/quant_opt.py +++ b/hls4ml/model/optimizer/passes/quant_opt.py @@ -13,6 +13,7 @@ and Linear nodes are immediately merged into the Constant. """ + import math # prefer to use math.ceil for scalar values import numpy as np @@ -67,7 +68,7 @@ def transform(self, model, node): bitwidth = bitwidth_node.get_attr('value') if bitwidth.size != 1: raise RuntimeError('Only scalar bitwidth values are supporeted by the Quant node') - node.set_attr('bitwidth', bitwidth) + node.set_attr('bitwidth', bitwidth[0]) node.inputs[3] = '' model.remove_node(bitwidth_node, rewire=False) From ab8d67b2ce9318106203d99dba12533570f0494d Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Fri, 1 Mar 2024 19:55:08 -0600 Subject: [PATCH 22/62] update automatic onnx configuration --- hls4ml/converters/__init__.py | 1 + hls4ml/converters/onnx_to_hls.py | 38 +++++++++++++----- hls4ml/model/graph.py | 3 +- hls4ml/utils/config.py | 66 ++++++++++++++++++++++++++++++-- test/pytest/test_qonnx.py | 6 +-- 5 files changed, 98 insertions(+), 16 deletions(-) diff --git a/hls4ml/converters/__init__.py b/hls4ml/converters/__init__.py index 0bc7ccdbe7..b7bcb05b9e 100644 --- a/hls4ml/converters/__init__.py +++ b/hls4ml/converters/__init__.py @@ -10,6 +10,7 @@ from hls4ml.converters.keras_to_hls import get_supported_keras_layers # noqa: F401 from hls4ml.converters.keras_to_hls import parse_keras_model # noqa: F401 from hls4ml.converters.keras_to_hls import keras_to_hls, register_keras_layer_handler +from hls4ml.converters.onnx_to_hls import parse_onnx_model # noqa: F401 from hls4ml.model import ModelGraph from hls4ml.utils.config import create_config from hls4ml.utils.symbolic_utils import LUTFunction diff --git a/hls4ml/converters/onnx_to_hls.py b/hls4ml/converters/onnx_to_hls.py index 8f6c7461fb..75850fa93e 100644 --- a/hls4ml/converters/onnx_to_hls.py +++ b/hls4ml/converters/onnx_to_hls.py @@ -162,26 +162,23 @@ def get_out_layer_name(graph): return [node.name for node in graph.node if node.output[0] in output_index_list] -def onnx_to_hls(config): - """Convert onnx model to hls model from configuration. +def parse_onnx_model(onnx_model): + """Parses the onnx model, both for configuration building and general processing. Args: - config (dict): ONNX configuration from yaml file or passed through API. + onnx_model: an ONNX model object. Raises: Exception: Raised if an unsupported operation is found in the ONNX model. Returns: - ModelGraph: hls4ml model object + layer_list (list): The onnx layers + input_layers (list): The input layers + output_layers (list): The output layers """ # This is a list of dictionaries to hold all the layer info we need to generate HLS layer_list = [] - # Extract model architecture - print('Interpreting Model ...') - - onnx_model = onnx.load(config['OnnxModel']) if isinstance(config['OnnxModel'], str) else config['OnnxModel'] - # We don't infer the shapes because the qonnx package preprocessing does it. # Obtain list of input/ouput layers @@ -257,6 +254,29 @@ def onnx_to_hls(config): print(f"Layer name: {layer['name']}, layer type: {layer['class_name']}, current shape: {input_shapes}") layer_list.append(layer) + return layer_list, input_layers, output_layers + + +def onnx_to_hls(config): + """Convert onnx model to hls model from configuration. + + Args: + config (dict): ONNX configuration from yaml file or passed through API. + + Raises: + Exception: Raised if an unsupported operation is found in the ONNX model. + + Returns: + ModelGraph: hls4ml model object + """ + + # Extract model architecture + print('Interpreting Model ...') + + onnx_model = onnx.load(config['OnnxModel']) if isinstance(config['OnnxModel'], str) else config['OnnxModel'] + + layer_list, input_layers, output_layers = parse_onnx_model(onnx_model) + ################# # Generate HLS ################# diff --git a/hls4ml/model/graph.py b/hls4ml/model/graph.py index a6b5c29e89..f0d29237b7 100644 --- a/hls4ml/model/graph.py +++ b/hls4ml/model/graph.py @@ -99,7 +99,8 @@ def get_precision(self, layer, var='default'): type_name = layer.name.lower() + '_' + var + '_t' if precision is None: precision = self.layer_name_precision.get(layer.name.lower() + '_default') - type_name = layer.name.lower() + '_default_t' + # I think it is better to keep these unique still to avoid inadvertent updates + # type_name = layer.name.lower() + '_default_t' if precision is None: precision = self.layer_type_precision.get(layer.class_name.lower() + '_' + var) diff --git a/hls4ml/utils/config.py b/hls4ml/utils/config.py index 5d7ca1ae72..5c41a52a82 100644 --- a/hls4ml/utils/config.py +++ b/hls4ml/utils/config.py @@ -321,7 +321,7 @@ def config_from_pytorch_model( def config_from_onnx_model( - model, granularity='model', backend=None, default_precision='ap_fixed<16,6>', default_reuse_factor=1 + model, granularity='name', backend=None, default_precision='ap_fixed<16,6>', default_reuse_factor=1 ): """Create an HLS conversion config given the ONNX model. @@ -331,8 +331,8 @@ def config_from_onnx_model( Args: model: ONNX model - granularity (str, optional): Granularity of the created config. Defaults to 'model'. - Can be set to 'model', 'type' and 'layer'. + granularity (str, optional): Granularity of the created config. Defaults to 'name'. + Can be set to 'model', 'type' and 'name'. Granularity can be used to generate a more verbose config that can be fine-tuned. The default granularity ('model') will generate config keys that apply to the whole @@ -351,6 +351,16 @@ def config_from_onnx_model( [dict]: The created config. """ + if granularity.lower() not in ['model', 'type', 'name']: + raise Exception( + f'Invalid configuration granularity specified, expected "model", "type" or "name" got "{granularity}"' + ) + + if backend is not None: + backend = hls4ml.backends.get_backend(backend) + elif granularity.lower() != 'model': + print('Warning: it is recommended to pass the backend to "config_from_onnx_model"') + config = {} model_config = {} @@ -360,4 +370,54 @@ def config_from_onnx_model( config['Model'] = model_config + layer_list, _, _ = hls4ml.converters.parse_onnx_model(model) + + def make_layer_config(layer): + cls_name = layer['class_name'] + + layer_cls = hls4ml.model.layers.layer_map[cls_name] + if backend is not None: + layer_cls = backend.create_layer_class(layer_cls) + + layer_config = {} + + # set the default precision of the layer to auto? + # (not really necessary if we set the backend appropriately) + # layer_config['Precision'] = {'default': 'auto'} + + config_attrs = [a for a in layer_cls.expected_attributes if a.configurable] + for attr in config_attrs: + if isinstance(attr, hls4ml.model.attributes.TypeAttribute): + precision_cfg = layer_config.setdefault('Precision', {}) + name = attr.name + if name.endswith('_t'): + name = name[:-2] + if attr.default is None: + precision_cfg[name] = 'auto' + else: + precision_cfg[name] = str(attr.default) + else: + if attr.default is not None: + layer_config[attr.config_name] = attr.default + + return layer_config + + if granularity.lower() == 'type': + type_config = {} + for layer in layer_list: + if layer['class_name'] in type_config: + continue + layer_config = make_layer_config(layer) + type_config[layer['class_name']] = layer_config + + config['LayerType'] = type_config + + elif granularity.lower() == 'name': + name_config = {} + for layer in layer_list: + layer_config = make_layer_config(layer) + name_config[layer['name']] = layer_config + + config['LayerName'] = name_config + return config diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py index 2c314c13ca..529a5adebc 100644 --- a/test/pytest/test_qonnx.py +++ b/test/pytest/test_qonnx.py @@ -93,7 +93,7 @@ def test_tfc_2w2a(tfc_2w2a_model, backend): y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] # Convert QONNX model, compile, and run inference - config = hls4ml.utils.config_from_onnx_model(model) + config = hls4ml.utils.config_from_onnx_model(model, backend=backend) # Some hand-derived config config['LayerName'] = {} config['LayerName']['global_in'] = {'Precision': 'ap_fixed<16,2>'} @@ -116,7 +116,7 @@ def test_cnv_2w2a(cnv_2w2a_model, backend): y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] # Convert QONNX model, compile, and run inference - config = hls4ml.utils.config_from_onnx_model(model, default_precision='fixed<32,16>') + config = hls4ml.utils.config_from_onnx_model(model, backend=backend, default_precision='fixed<32,16>') hls_model = hls4ml.converters.convert_from_onnx_model( model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_cnv-2w2a_{backend}'), @@ -142,7 +142,7 @@ def test_jet_tagging(jettagging_model, backend): y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] # Convert QONNX model, compile, and run inference - config = hls4ml.utils.config_from_onnx_model(model) + config = hls4ml.utils.config_from_onnx_model(model, backend=backend) hls_model = hls4ml.converters.convert_from_onnx_model( model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_jettag_{backend}'), backend=backend, hls_config=config From 0a863adcc25b5facae2b9b375bf4c7fa1bc41ecc Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Sat, 2 Mar 2024 12:49:43 -0600 Subject: [PATCH 23/62] standardize on merge operators --- hls4ml/converters/onnx/merge.py | 14 +++++++++++- .../model/optimizer/passes/infer_precision.py | 12 ++++++++-- hls4ml/model/optimizer/passes/merge_const.py | 22 +++++++++---------- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/hls4ml/converters/onnx/merge.py b/hls4ml/converters/onnx/merge.py index 2309cc213f..420f077ec2 100644 --- a/hls4ml/converters/onnx/merge.py +++ b/hls4ml/converters/onnx/merge.py @@ -2,13 +2,25 @@ merge_layers = ['Add', 'Sub', 'Mul', 'Div', 'Average', 'Max', 'Min', 'Concat', 'Sum'] +op_map = { + 'Add': 'add', + 'Sub': 'subtract', + 'Mul': 'multiply', + 'Div': 'divide', + 'Average': 'average', + 'Max': 'maximum', + 'Min': 'minimum', + 'Sum': 'add', + 'Concat': 'concat', +} + @onnx_handler(*merge_layers) def parse_merge_layer(node, input_names, input_shapes, graph): layer = {} layer['class_name'] = node.op_type layer['name'] = node.name - layer['op'] = layer['class_name'].lower() + layer['op'] = op_map[node.op_type] layer['inputs'] = input_names layer['outputs'] = list(node.output) diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py index c660647d3b..d1629a7a13 100644 --- a/hls4ml/model/optimizer/passes/infer_precision.py +++ b/hls4ml/model/optimizer/passes/infer_precision.py @@ -66,6 +66,10 @@ def _infer_precision(self, node, types_to_infer): return [] + def _get_default_precision(self, node): + model_config = node.model.config + return model_config.backend.convert_precision_string(model_config.model_precision['default']) + def _infer_default_type(self, node, type_name): model_config = node.model.config default_precision = model_config.backend.convert_precision_string(model_config.model_precision['default']) @@ -324,11 +328,12 @@ def _infer_merge_precision(self, node, types_to_infer): + 1 ) new_width = new_int + max(input_1.fractional, input_2.fractional) - + out_precision = FixedPrecisionType(new_width, new_int, new_signed) elif op == 'multiply': new_signed = input_1.signed or input_2.signed new_int = input_1.integer + input_2.integer new_width = input_1.width + input_2.width + out_precision = FixedPrecisionType(new_width, new_int, new_signed) elif op in ('maximum', 'minimum'): new_signed = input_1.signed or input_2.signed @@ -343,8 +348,11 @@ def _infer_merge_precision(self, node, types_to_infer): new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer) new_int = max(input_1_integer, input_2_integer) + out_precision = FixedPrecisionType(new_width, new_int, new_signed) + else: + print(f'Warning: not propagating weights for type {op}') + out_precision = self._get_default_precision(node) - out_precision = FixedPrecisionType(new_width, new_int, new_signed) node.types['result_t'].name = node.name + '_result_t' node.types['result_t'].precision = out_precision diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py index 25bd59bda6..54f275d9ec 100644 --- a/hls4ml/model/optimizer/passes/merge_const.py +++ b/hls4ml/model/optimizer/passes/merge_const.py @@ -35,19 +35,19 @@ def transform(self, model, node): val1 = const_node1.attributes['value'] op = node.attributes['op'] - if op in ('add', 'sum'): + if op == 'add': new_val = val0 + val1 - elif op == 'sub': + elif op == 'subtract': new_val = val0 - val1 - elif op == 'mul': + elif op == 'multiply': new_val = val0 * val1 - elif op == 'div': + elif op == 'divide': new_val = val0 / val1 elif op == 'average': new_val = np.mean(np.array([val0, val1]), axis=0) - elif op == 'max': + elif op == 'maximum': new_val = np.maximum(val0, val1) - elif op == 'min': + elif op == 'minimum': new_val = np.minimum(val0, val1) else: raise RuntimeError(f'Unexpected op_type: {op}') @@ -76,7 +76,7 @@ class MergeToApplyAlpha(OptimizerPass): def match(self, node): is_match = ( isinstance(node, Merge) - and node.attributes['op'] in ('add', 'sum', 'sub', 'mul') # Div is separate + and node.attributes['op'] in ('add', 'subtract', 'multiply') # Div is separate and ( isinstance(node.get_input_node(node.inputs[0]), Constant) != isinstance(node.get_input_node(node.inputs[1]), Constant) @@ -108,12 +108,12 @@ def transform(self, model, node): bias_quantizer = None op = node.attributes['op'] - if op in ('add', 'sum'): + if op == 'add': scale = np.array(1) scale_precision = IntegerPrecisionType(1, False) bias = const_node.attributes['value'] bias_quantizer = const_node.get_attr('quantizer') - elif op == 'sub': + elif op == 'subtract': bias_quantizer = const_node.get_attr('quantizer') if node1const: scale = np.array(1) @@ -139,7 +139,7 @@ def transform(self, model, node): scale_precision = IntegerPrecisionType(2, True) bias = const_node.attributes['value'] - elif op == 'mul': + elif op == 'multiply': scale = const_node.attributes['value'] scale_quantizer = const_node.get_attr('quantizer') bias = np.array(0) @@ -187,7 +187,7 @@ class MergeToApplyAlphaDiv(OptimizerPass): def match(self, node): is_match = ( isinstance(node, Merge) - and node.attributes['op'] == 'div' + and node.attributes['op'] == 'divide' and isinstance(node.get_input_node(node.inputs[1]), Constant) ) # only second can be const From bfe6a3f6650705ac2a845949b654f48fdb86acfa Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 7 Mar 2024 19:52:01 -0600 Subject: [PATCH 24/62] snapshot of current work --- hls4ml/model/graph.py | 61 +++++++++------- hls4ml/model/layers.py | 5 -- hls4ml/model/optimizer/__init__.py | 17 +++-- .../model/optimizer/passes/batchnorm_opt.py | 2 +- hls4ml/model/optimizer/passes/bn_fuse.py | 49 ++++--------- .../model/optimizer/passes/conv_to_convxd.py | 20 +++--- .../model/optimizer/passes/infer_precision.py | 12 +++- hls4ml/model/optimizer/passes/linear.py | 8 ++- .../optimizer/passes/matmul_const_to_dense.py | 32 ++++----- hls4ml/model/optimizer/passes/merge_const.py | 72 ++++++++++--------- hls4ml/model/optimizer/passes/quant_opt.py | 65 ++++++++++++----- hls4ml/model/types.py | 26 +------ 12 files changed, 185 insertions(+), 184 deletions(-) diff --git a/hls4ml/model/graph.py b/hls4ml/model/graph.py index f0d29237b7..33b367a929 100644 --- a/hls4ml/model/graph.py +++ b/hls4ml/model/graph.py @@ -94,6 +94,11 @@ def get_layer_config(self, layer): return layer_config + def set_name_config(self, name, config): + hls_config = self.config['HLSConfig'] + layer_config = hls_config.setdefault('LayerName', {}) + layer_config[name] = config + def get_precision(self, layer, var='default'): precision = self.layer_name_precision.get(layer.name.lower() + '_' + var) type_name = layer.name.lower() + '_' + var + '_t' @@ -178,6 +183,35 @@ def get_compression(self, layer): return compression + def parse_name_config(self, layer_name, layer_cfg): + """This is used by _parse_hls_config below, but also in optimizers when a new layer config is created""" + precision_cfg = layer_cfg.get('Precision') + if isinstance(precision_cfg, dict): + for var, precision in precision_cfg.items(): + self.layer_name_precision[layer_name.lower() + '_' + var] = precision + else: + self.layer_name_precision[layer_name.lower() + '_default'] = precision_cfg + + rf = layer_cfg.get('ReuseFactor') + if rf is not None: + self.layer_name_rf[layer_name.lower()] = rf + + targ_cycles = layer_cfg.get('TargetCycles') + if targ_cycles is not None: + self.layer_name_targ_cycles[layer_name.lower()] = targ_cycles + + strategy = layer_cfg.get('Strategy') + if strategy is not None: + self.layer_name_strategy[layer_name.lower()] = strategy + + conv_implementation = layer_cfg.get('ConvImplementation') + if conv_implementation is not None: + self.layer_name_conv_implementation[layer_name.lower()] = conv_implementation + + compression = layer_cfg.get('Compression') + if compression is not None: + self.layer_name_compression[layer_name.lower()] = bool(compression) + def _parse_hls_config(self): hls_config = self.config['HLSConfig'] @@ -250,32 +284,7 @@ def _parse_hls_config(self): layer_name_cfg = hls_config.get('LayerName') if layer_name_cfg is not None: for layer_name, layer_cfg in layer_name_cfg.items(): - precision_cfg = layer_cfg.get('Precision') - if isinstance(precision_cfg, dict): - for var, precision in precision_cfg.items(): - self.layer_name_precision[layer_name.lower() + '_' + var] = precision - else: - self.layer_name_precision[layer_name.lower() + '_default'] = precision_cfg - - rf = layer_cfg.get('ReuseFactor') - if rf is not None: - self.layer_name_rf[layer_name.lower()] = rf - - targ_cycles = layer_cfg.get('TargetCycles') - if targ_cycles is not None: - self.layer_name_targ_cycles[layer_name.lower()] = targ_cycles - - strategy = layer_cfg.get('Strategy') - if strategy is not None: - self.layer_name_strategy[layer_name.lower()] = strategy - - conv_implementation = layer_cfg.get('ConvImplementation') - if conv_implementation is not None: - self.layer_name_conv_implementation[layer_name.lower()] = conv_implementation - - compression = layer_cfg.get('Compression') - if compression is not None: - self.layer_name_compression[layer_name.lower()] = bool(compression) + self.parse_name_config(layer_name, layer_cfg) def _validate_hls_config(self): use_dataflow = False diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index d0ac7e5561..a5130fa7bb 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -249,11 +249,6 @@ def add_output_variable( self.set_attr(out_name, out) - def update_output_precision(self, precision, output_name=None): - if output_name is None: - output_name = self.outputs[0] - self.variables[output_name].type.precision = precision - def add_weights(self, quantizer=None, compression=False): self.add_weights_variable( name='weight', var_name='w{index}', data='weight', quantizer=quantizer, compression=compression diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index bd4da19071..f05f8e3e04 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -33,11 +33,6 @@ register_flow( 'convert', [ - 'infer_precision_types', - 'channels_last_converter', - 'fuse_bias_add', - 'remove_useless_transpose', - 'expand_layer_group', 'reshape_constant', 'quant_constant_parameters', 'quant_to_activation', @@ -56,10 +51,17 @@ 'merge_to_apply_alpha_div', 'matmul_const_to_dense', 'conv_to_conv_x_d', + 'fuse_consecutive_batch_normalization', # needs to be before infer_precision_types + 'merge_linear_activation', # needs to be before infer_precision_types + 'fuse_batch_normalization', # needs to be before infer_precision_types + 'infer_precision_types', + 'channels_last_converter', + 'fuse_bias_add', + 'remove_useless_transpose', + 'expand_layer_group', 'output_rounding_saturation_mode', 'qkeras_factorize_alpha', 'extract_ternary_threshold', - 'fuse_consecutive_batch_normalization', ], ) # TODO Maybe not all QKeras optmizers belong here? @@ -67,13 +69,10 @@ 'optimize', [ 'eliminate_linear_activation', - 'fuse_consecutive_batch_normalization', - 'fuse_batch_normalization', 'remove_nop_batch_normalization', 'replace_multidimensional_dense_with_conv', 'infer_precision_types', 'set_precision_concat', - 'merge_linear_activation', ], requires=['convert'], ) diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py index ee00ecfa46..1800b33056 100644 --- a/hls4ml/model/optimizer/passes/batchnorm_opt.py +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -5,7 +5,7 @@ from hls4ml.model.quantizers import QuantNodeQuantizer from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, UnspecifiedPrecisionType -_base_attributes = ('Trace', 'reuse_factor', 'epsilon', 'n_in', 'n_filt') +_base_attributes = ('epsilon', 'n_in', 'n_filt') class BatchNormOnnxConstantParameters(OptimizerPass): diff --git a/hls4ml/model/optimizer/passes/bn_fuse.py b/hls4ml/model/optimizer/passes/bn_fuse.py index a636af2f86..c84430f13f 100644 --- a/hls4ml/model/optimizer/passes/bn_fuse.py +++ b/hls4ml/model/optimizer/passes/bn_fuse.py @@ -29,13 +29,13 @@ def match(self, node): b1 = node.weights['bias'].data_unquantized scale_compatible = ( (prev_node.get_attr('weight_quantizer') is None and node.get_attr('scale_quantizer') is None) - or (s0 == np.ones_like(s0)).all() - or (s1 == np.ones_like(s1)).all() + or ((s0 == np.ones_like(s0)).all() and prev_node.get_attr('weight_quantizer') is None) + or ((s1 == np.ones_like(s1)).all() and node.get_attr('scale_quantizer') is None) ) bias_compatible = ( (prev_node.get_attr('bias_quantizer') is None and node.get_attr('bias_quantizer') is None) - or (b0 == np.zeros_like(b0)).all() - or (b1 == np.zeros_like(b1)).all() + or ((b0 == np.zeros_like(b0)).all() and prev_node.get_attr('bias_quantizer') is None) + or ((b1 == np.zeros_like(b1)).all() and node.get_attr('bias_quantizer') is None) ) return scale_compatible and bias_compatible @@ -60,12 +60,14 @@ def transform(self, model, node): bn_scale = node.weights['scale'] bn_bias = node.weights['bias'] + allowed_precisions = (IntegerPrecisionType, FixedPrecisionType, UnspecifiedPrecisionType) + # only merge if the types are integer or fixed if ( - not isinstance(parent_weight.type, (IntegerPrecisionType, FixedPrecisionType)) - or not isinstance(parent_bias.type, (IntegerPrecisionType, FixedPrecisionType)) - or not isinstance(bn_scale.type, (IntegerPrecisionType, FixedPrecisionType)) - or not isinstance(bn_bias.type, (IntegerPrecisionType, FixedPrecisionType)) + not isinstance(parent_weight.type.precision, allowed_precisions) + or not isinstance(parent_bias.type.precision, allowed_precisions) + or not isinstance(bn_scale.type.precision, allowed_precisions) + or not isinstance(bn_bias.type.precision, allowed_precisions) ): return False @@ -74,44 +76,21 @@ def transform(self, model, node): w_quantizer = ( node.get_attr('scale_quantizer') - if (parent_weight.data == np.ones_like(parent_weight.data)).all() + if node.get_attr('scale_quantizer') is not None else parent_node.get_attr('weight_quantizer') ) b_quantizer = ( node.get_attr('bias_quantizer') - if (parent_bias.data == np.zeros_like(parent_bias.data)).all() + if node.get_attr('bias_quantizer') is not None else parent_node.get_attr('bias_quantizer') ) node.set_attr('weight_quantizer', w_quantizer) node.set_attr('bias_quantizer', b_quantizer) - # Not sure if this setting of this is useful - w_prec = None - if w_quantizer is None and (fused_weight == np.ones_like(fused_weight)).all(): - if ( - isinstance(parent_weight.type, IntegerPrecisionType) - and isinstance(bn_scale.type, IntegerPrecisionType) - and parent_weight.type.width == 1 - and bn_scale.type.width == 1 - ): - w_prec = node.weights['scale'].type - - b_prec = None - if b_quantizer is None and (fused_bias == np.zeros_like(fused_bias)).all(): - if ( - isinstance(parent_bias.type, IntegerPrecisionType) - and isinstance(bn_bias.type, IntegerPrecisionType) - and parent_bias.type.width == 1 - and bn_bias.type.width == 1 - ): - b_prec = node.weights['bias'].type - # call function so that quantizer would be called if needed - node.add_weights_variable( - name='weight', var_name='w{index}', data=fused_weight, quantizer=w_quantizer, precision=w_prec - ) - node.add_weights_variable(name='bias', var_name='b{index}', data=fused_bias, quantizer=b_quantizer, precision=b_prec) + node.add_weights_variable(name='weight', var_name='w{index}', data=fused_weight, quantizer=w_quantizer) + node.add_weights_variable(name='bias', var_name='b{index}', data=fused_bias, quantizer=b_quantizer) model.remove_node(node, rewire=True) diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py index e54c98c1d7..6fb88ad0d0 100644 --- a/hls4ml/model/optimizer/passes/conv_to_convxd.py +++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py @@ -2,13 +2,9 @@ from hls4ml.model.layers import Constant, Conv, Conv1D, Conv2D from hls4ml.model.optimizer import OptimizerPass -from hls4ml.model.quantizers import QuantNodeQuantizer -from hls4ml.model.types import IntegerPrecisionType # these are attributes to copy _base_attributes = ( - 'Trace', - 'reuse_factor', 'in_width', 'out_width', 'n_chan', @@ -25,7 +21,6 @@ 'filt_height', 'stride_height', 'dilation_height', - 'strategy', 'data_format', ) @@ -69,16 +64,19 @@ def transform(self, model, node): if bias_node: attributes['bias_data'] = bias_node.attributes['value'] attributes['bias_quantizer'] = bias_node.get_attr('quantizer') - attributes['have_bias'] = True + attributes['use_bias'] = True else: attributes['bias_data'] = np.zeros(attributes['n_filt']) - attributes['bias_quantizer'] = QuantNodeQuantizer(IntegerPrecisionType(1, False)) - attributes['have_bias'] = False + attributes['use_bias'] = False + + # get the configuration name + config = model.config.get_layer_config(node) + new_name = f'{newtype.__name__}_{node.name}' + model.config.set_name_config(new_name, config) + model.config.parse_name_config(new_name, config) # making new node - new_node = model.make_node( - newtype, f'{newtype.__name__}_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs] - ) + new_node = model.make_node(newtype, new_name, attributes, [node.inputs[0]], [x for x in node.outputs]) # removing and replacing old nodes if bias_node: diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py index d1629a7a13..4de58a18c2 100644 --- a/hls4ml/model/optimizer/passes/infer_precision.py +++ b/hls4ml/model/optimizer/passes/infer_precision.py @@ -12,8 +12,18 @@ class InferPrecisionTypes(ConfigurableOptimizerPass): def __init__(self): # The option, infer_no_bias, allows you to tailor for the given weights, in particular, zero bias self.infer_no_bias = False + self.count = 0 + self.MAX_COUNT = 1000 def match(self, node): + input_var = node.get_input_variable() + if input_var is not None and isinstance(input_var.type, UnspecifiedPrecisionType): + # need to wait for the input to update + # but check for infinite loops + self.count += 1 + if self.count == self.MAX_COUNT: + raise RuntimeError("There is an infinite loop in the precision inference.") + return False for layer_type in node.types.values(): if isinstance(layer_type.precision, UnspecifiedPrecisionType): return True @@ -30,7 +40,7 @@ def transform(self, model, node): if type_name not in inferred_types: self._infer_default_type(node, type_name) - return False # No model graph changes made + return True # May need to rerun def _infer_precision(self, node, types_to_infer): node_class = node.class_name diff --git a/hls4ml/model/optimizer/passes/linear.py b/hls4ml/model/optimizer/passes/linear.py index 78a808b9a1..1b8e3d9686 100644 --- a/hls4ml/model/optimizer/passes/linear.py +++ b/hls4ml/model/optimizer/passes/linear.py @@ -15,6 +15,9 @@ def transform(self, model, node): return True +_safe_parents = (Dense, Conv1D, Conv2D, BatchNormalization, Activation) + + class MergeLinearActivation(OptimizerPass): ''' For many objects it's safe to change the output precision independently of the calculation. @@ -26,7 +29,7 @@ def match(self, node): ''' if isinstance(node, Activation) and node.get_attr('activation') == 'linear': parent = node.get_input_node(node.inputs[0]) - safe_parent = isinstance(parent, (Dense, Conv1D, Conv2D, BatchNormalization)) + safe_parent = isinstance(parent, _safe_parents) return safe_parent and isinstance(parent.get_output_variable().type.precision, UnspecifiedPrecisionType) else: return False @@ -35,6 +38,7 @@ def transform(self, model, node): prev_node = node.get_input_node(node.inputs[0]) quantizer = node.get_attr("quantizer") prev_node.set_attr("quantizer", quantizer) - prev_node.update_output_precision(quantizer.hls_type) + prev_node.types['result_t'] = quantizer.hls_type + prev_node.get_output_variable().type.precision = quantizer.hls_type model.remove_node(node) return True diff --git a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py index 889a376cee..4c48944eb3 100644 --- a/hls4ml/model/optimizer/passes/matmul_const_to_dense.py +++ b/hls4ml/model/optimizer/passes/matmul_const_to_dense.py @@ -2,10 +2,6 @@ from hls4ml.model.layers import Constant, Dense, MatMul from hls4ml.model.optimizer import OptimizerPass -from hls4ml.model.quantizers import QuantNodeQuantizer -from hls4ml.model.types import IntegerPrecisionType - -_base_attributes = ('Trace', 'reuse_factor', 'weight', 'weight_t', 'bias', 'bias_t') class MatmulConstToDense(OptimizerPass): @@ -30,27 +26,29 @@ def transform(self, model, node): weight_data = const_node.attributes['value'] weight_quantizer = const_node.get_attr('quantizer') + # get the configuration name + config = model.config.get_layer_config(node) + new_name = f'Dense_{node.name}' + model.config.set_name_config(new_name, config) + model.config.parse_name_config(new_name, config) + in_shape = other_var.shape n_in = np.prod(in_shape) out_shape = list(in_shape[:-1]) + [weight_data.shape[-1]] n_out = np.prod(out_shape) # creating the attributes - attributes = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes.update( - { - 'weight_data': weight_data, - 'weight_quantizer': weight_quantizer, - 'bias_data': np.zeros(out_shape), - 'bias_quantizer': QuantNodeQuantizer(IntegerPrecisionType(1, False)), - 'have_bias': False, - 'n_in': n_in, - 'n_out': n_out, - } - ) + attributes = { + 'weight_data': weight_data, + 'weight_quantizer': weight_quantizer, + 'bias_data': np.zeros(out_shape), + 'use_bias': False, + 'n_in': n_in, + 'n_out': n_out, + } # making new node - new_dense = model.make_node(Dense, f'Dense_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs]) + new_dense = model.make_node(Dense, new_name, attributes, [node.inputs[0]], [x for x in node.outputs]) # removing and replacing old nodes model.remove_node(const_node, rewire=False) diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py index 54f275d9ec..78591d203c 100644 --- a/hls4ml/model/optimizer/passes/merge_const.py +++ b/hls4ml/model/optimizer/passes/merge_const.py @@ -5,8 +5,6 @@ from hls4ml.model.quantizers import QuantNodeQuantizer from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType -_base_attributes = ('Trace', 'reuse_factor', 'n_in') - # This should generally not happen because of qonnx cleaning class MergeTwoConstants(OptimizerPass): @@ -56,12 +54,10 @@ def transform(self, model, node): const_node0.set_attr('quantizer', quantizer) # overwrite the quantizer if quantizer: const_node0.set_attr('quantizer', quantizer) - + const_node0.types['result_t'] = quantizer.hls_type + const_node0.get_output_variable().type.precision = quantizer.hls_type const_node0.set_attr('value', new_val) - # reinitialize (which also runs quantization if quantizer exists) - const_node0.initialize() - model.remove_node(const_node1, rewire=False) # remove the batch norm node @@ -151,23 +147,26 @@ def transform(self, model, node): if bias.shape != tuple(input_shape) and np.squeeze(bias).shape != tuple(input_shape): bias = np.broadcast_to(bias, input_shape) - attributes = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes.update( - { - 'scale_data': scale, - 'bias_data': bias, - 'n_in': n_in, - 'n_out': n_in, - 'n_filt': -1, - 'scale_precision': scale_precision, - 'scale_quantizer': scale_quantizer, - 'bias_precision': bias_precision, - 'bias_quantizer': bias_quantizer, - } - ) + attributes = { + 'scale_data': scale, + 'bias_data': bias, + 'n_in': n_in, + 'n_out': n_in, + 'n_filt': -1, + 'scale_precision': scale_precision, + 'scale_quantizer': scale_quantizer, + 'bias_precision': bias_precision, + 'bias_quantizer': bias_quantizer, + } + + # get the configuration name + config = model.config.get_layer_config(node) + new_name = f'bn_{node.name}' + model.config.set_name_config(new_name, config) + model.config.parse_name_config(new_name, config) aa_layer = model.make_node( - ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[input_node_idx]], [x for x in node.outputs] + ApplyAlpha, new_name, attributes, [node.inputs[input_node_idx]], [x for x in node.outputs] ) model.remove_node(const_node, rewire=False) @@ -222,20 +221,23 @@ def transform(self, model, node): if bias.shape != tuple(input_shape) and np.squeeze(bias).shape != tuple(input_shape): bias = np.broadcast_to(bias, input_shape) - attributes = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes.update( - { - 'scale_data': scale, - 'bias_data': bias, - 'scale_quantizer': scale_quantizer, - 'bias_precision': bias_precision, - 'n_in': n_in, - 'n_out': n_in, - 'n_filt': -1, - } - ) - - bn_layer = model.make_node(ApplyAlpha, f'bn_{node.name}', attributes, [node.inputs[0]], [x for x in node.outputs]) + attributes = { + 'scale_data': scale, + 'bias_data': bias, + 'scale_quantizer': scale_quantizer, + 'bias_precision': bias_precision, + 'n_in': n_in, + 'n_out': n_in, + 'n_filt': -1, + } + + # get the configuration name + config = model.config.get_layer_config(node) + new_name = f'bn_{node.name}' + model.config.set_name_config(new_name, config) + model.config.parse_name_config(new_name, config) + + bn_layer = model.make_node(ApplyAlpha, new_name, attributes, [node.inputs[0]], [x for x in node.outputs]) model.remove_node(const_node, rewire=False) del node.inputs[1] diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py index 0d02124bc6..ed7f9701a2 100644 --- a/hls4ml/model/optimizer/passes/quant_opt.py +++ b/hls4ml/model/optimizer/passes/quant_opt.py @@ -14,6 +14,7 @@ """ +import copy import math # prefer to use math.ceil for scalar values import numpy as np @@ -25,8 +26,6 @@ _ALSO_MATCH_PO2 = True -_base_attributes = ('Trace', 'reuse_factor') - class QuantConstantParameters(OptimizerPass): """Remove Constant from the Qaunt node parameters (but not input[0])""" @@ -131,11 +130,17 @@ def transform(self, model, node): precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode) - attributes = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes.update({'activation': 'linear', 'quantizer': quantizer}) + attributes = {'activation': 'linear', 'quantizer': quantizer} + + # update the configuration + config = model.config.get_layer_config(node) + prec_config = config.setdefault('Precision', {}) + prec_config['result'] = str(precision) + new_name = f'{node.name}_act' + model.config.set_name_config(new_name, config) + model.config.parse_name_config(new_name, config) - new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs]) - new_node.get_output_variable().type.precision = precision + new_node = model.make_node(Activation, new_name, attributes, [node.inputs[0]], [x for x in node.outputs]) model.replace_node(node, new_node) return True @@ -189,8 +194,11 @@ def transform(self, model, node): const_node = node.get_input_node(node.inputs[0]) const_node.set_attr('quantizer', quantizer) + const_node.set_attr('result_t', precision) const_node.get_output_variable().type.precision = precision + # Should we update the configuration to reflect the new precision? I don't think it's necessary + # remove the Quant node model.remove_node(node, rewire=True) @@ -228,11 +236,18 @@ def transform(self, model, node): precision, quantizer = _calculate_precision_quantizer(bitwidth, bitwidth, signed, narrow, rounding_mode) - attributes = {k: node.attributes.get(k, None) for k in _base_attributes} - attributes.update({'activation': 'linear', 'quantizer': quantizer}) + activation_attributes = {'activation': 'linear', 'quantizer': quantizer} + + # update the configuration + config = model.config.get_layer_config(node) + act_config = copy.deepcopy(config) + prec_config = act_config.setdefault('Precision', {}) + prec_config['result'] = str(precision) + act_name = f'{node.name}_act' + model.config.set_name_config(act_name, act_config) + model.config.parse_name_config(act_name, act_config) - new_node = model.make_node(Activation, f'{node.name}_act', attributes, [node.inputs[0]], [x for x in node.outputs]) - new_node.get_output_variable().type.precision = precision + new_node = model.make_node(Activation, act_name, activation_attributes, [node.inputs[0]], [x for x in node.outputs]) model.replace_node(node, new_node) # but now add the ApplyAlhpas before and after @@ -240,16 +255,25 @@ def transform(self, model, node): scale = node.get_attr('scale') bias = node.get_attr('zeropt') - attributes_scale = {k: node.attributes.get(k, None) for k in _base_attributes} + attributes_scale = {} + attributes_rescale = {} - attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes} + scale_config = copy.deepcopy(config) + scale_name = f'{node.name}_scale' + model.config.set_name_config(scale_name, scale_config) + model.config.parse_name_config(scale_name, scale_config) + + rescale_config = config # no need to deep copy the last + rescale_name = f'{node.name}_rescale' + model.config.set_name_config(rescale_name, rescale_config) + model.config.parse_name_config(rescale_name, rescale_config) firstscale = 1 / scale firstbias = bias attributes_scale['scale_data'] = firstscale attributes_scale['bias_data'] = firstbias - scale_node = model.make_node(ApplyAlpha, node.name + '_scale', attributes_scale, [node.inputs[0]]) + scale_node = model.make_node(ApplyAlpha, scale_name, attributes_scale, [node.inputs[0]]) model.insert_node(scale_node) rescale = scale @@ -257,7 +281,7 @@ def transform(self, model, node): attributes_rescale['scale_data'] = rescale attributes_rescale['bias_data'] = rebias - rescale_node = model.make_node(ApplyAlpha, node.name + '_rescale', attributes_rescale, [new_node.outputs[0]]) + rescale_node = model.make_node(ApplyAlpha, rescale_name, attributes_rescale, [new_node.outputs[0]]) model.insert_node(rescale_node) return True @@ -305,10 +329,15 @@ def transform(self, model, node): const_node.set_attr('value', new_val) const_node.set_attr('quantizer', quantizer) - # reinitialize (which also runs quantization if quantizer exists) - const_node.initialize() + const_node.types['result_t'].precision = precision + const_node.get_output_variable().type.precision = precision + + attributes_rescale = {} - attributes_rescale = {k: node.attributes.get(k, None) for k in _base_attributes} + rescale_config = copy.deepcopy(model.config.get_layer_config(node)) + rescale_name = f'{node.name}_rescale' + model.config.set_name_config(rescale_name, rescale_config) + model.config.parse_name_config(rescale_name, rescale_config) rescale = scale rebias = -bias * scale @@ -316,7 +345,7 @@ def transform(self, model, node): attributes_rescale['bias_data'] = rebias rescale_node = model.make_node( - ApplyAlpha, node.name + '_rescale', attributes_rescale, [x for x in node.inputs], [x for x in node.outputs] + ApplyAlpha, rescale_name, attributes_rescale, [x for x in node.inputs], [x for x in node.outputs] ) model.replace_node(node, rescale_node) diff --git a/hls4ml/model/types.py b/hls4ml/model/types.py index 05617ba124..9fb257a1ef 100644 --- a/hls4ml/model/types.py +++ b/hls4ml/model/types.py @@ -115,26 +115,6 @@ def saturation_mode(self): def saturation_bits(self): return 0 - @property - def integer(self): - return self.width - - @property - def fractional(self): - return 0 - - @property - def rounding_mode(self): - return RoundingMode.TRN - - @property - def saturation_mode(self): - return SaturationMode.WRAP - - @property - def saturation_bits(self): - return None - class FixedPrecisionType(PrecisionType): """Arbitrary precision fixed-point data type. @@ -159,10 +139,6 @@ def __init__(self, width=16, integer=6, signed=True, rounding_mode=None, saturat # make this a property to avoid inconsistencies - @property - def fractional(self): - return self.width - self.integer - @property def fractional(self): return self.width - self.integer @@ -231,6 +207,7 @@ def __init__(self): super().__init__(width=1, signed=False) self.integer = 1 + # TODO: this should really be a specific type def __str__(self): typestring = 'uint<1>' return typestring @@ -245,6 +222,7 @@ class ExponentPrecisionType(PrecisionType): def __init__(self, width=16, signed=True): super().__init__(width=width, signed=signed) + # TODO: this should really be a specific type, not int def __str__(self): typestring = '{signed}int<{width}>'.format(signed='u' if not self.signed else '', width=self.width) return typestring From 25849ef435731679961b5ba7068abbede1d9f02a Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Sun, 10 Mar 2024 15:30:36 -0500 Subject: [PATCH 25/62] Fix bug in FuseBatchNormalization --- hls4ml/model/optimizer/passes/bn_fuse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hls4ml/model/optimizer/passes/bn_fuse.py b/hls4ml/model/optimizer/passes/bn_fuse.py index c84430f13f..b3e8e454c8 100644 --- a/hls4ml/model/optimizer/passes/bn_fuse.py +++ b/hls4ml/model/optimizer/passes/bn_fuse.py @@ -89,8 +89,8 @@ def transform(self, model, node): node.set_attr('bias_quantizer', b_quantizer) # call function so that quantizer would be called if needed - node.add_weights_variable(name='weight', var_name='w{index}', data=fused_weight, quantizer=w_quantizer) - node.add_weights_variable(name='bias', var_name='b{index}', data=fused_bias, quantizer=b_quantizer) + parent_node.add_weights_variable(name='weight', var_name='w{index}', data=fused_weight, quantizer=w_quantizer) + parent_node.add_weights_variable(name='bias', var_name='b{index}', data=fused_bias, quantizer=b_quantizer) model.remove_node(node, rewire=True) From 4485bf3154ed5f4fbdabea1888b122ba84d2df80 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Sun, 10 Mar 2024 20:23:58 -0500 Subject: [PATCH 26/62] fix issue with configuration setup of test --- test/pytest/test_qonnx.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py index 529a5adebc..426df8f2e0 100644 --- a/test/pytest/test_qonnx.py +++ b/test/pytest/test_qonnx.py @@ -93,10 +93,7 @@ def test_tfc_2w2a(tfc_2w2a_model, backend): y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] # Convert QONNX model, compile, and run inference - config = hls4ml.utils.config_from_onnx_model(model, backend=backend) - # Some hand-derived config - config['LayerName'] = {} - config['LayerName']['global_in'] = {'Precision': 'ap_fixed<16,2>'} + config = hls4ml.utils.config_from_onnx_model(model, backend=backend, default_precision='fixed<32,16>') hls_model = hls4ml.converters.convert_from_onnx_model( model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_tfc-2w2a_{backend}'), backend=backend, hls_config=config ) From 52067c32e5de07af94322815b3fd02db5f8a5efa Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Sun, 10 Mar 2024 22:25:15 -0500 Subject: [PATCH 27/62] fix bug in FuseConsecutiveBatchNormalization --- hls4ml/model/optimizer/passes/batchnorm_opt.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py index 1800b33056..26292d7e2a 100644 --- a/hls4ml/model/optimizer/passes/batchnorm_opt.py +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -197,10 +197,10 @@ def transform(self, model, node): # only merge if the types are integer or fixed if ( - not isinstance(prev_node.weights['scale'].type, (IntegerPrecisionType, FixedPrecisionType)) - or not isinstance(prev_node.weights['bias'].type, (IntegerPrecisionType, FixedPrecisionType)) - or not isinstance(node.weights['scale'].type, (IntegerPrecisionType, FixedPrecisionType)) - or not isinstance(node.weights['bias'].type, (IntegerPrecisionType, FixedPrecisionType)) + not isinstance(prev_node.weights['scale'].type.precision, (IntegerPrecisionType, FixedPrecisionType)) + or not isinstance(prev_node.weights['bias'].type.precision, (IntegerPrecisionType, FixedPrecisionType)) + or not isinstance(node.weights['scale'].type.precision, (IntegerPrecisionType, FixedPrecisionType)) + or not isinstance(node.weights['bias'].type.precision, (IntegerPrecisionType, FixedPrecisionType)) ): return False From 24d6245660d2b601301dc800e4401a5098b39c2a Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Mon, 11 Mar 2024 18:54:41 -0500 Subject: [PATCH 28/62] add missing header --- .../quartus/firmware/nnet_utils/nnet_conv2d_resource.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_conv2d_resource.h b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_conv2d_resource.h index 73ad45592f..f5ce781739 100644 --- a/hls4ml/templates/quartus/firmware/nnet_utils/nnet_conv2d_resource.h +++ b/hls4ml/templates/quartus/firmware/nnet_utils/nnet_conv2d_resource.h @@ -1,6 +1,8 @@ #ifndef NNET_CONV2D_RESOURCE_H_ #define NNET_CONV2D_RESOURCE_H_ +#include + #include "nnet_common.h" #include "nnet_dense.h" #include "nnet_helpers.h" From 835af4e0a2c1ce403c74342f873fe727f01d99c0 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Mon, 11 Mar 2024 18:55:41 -0500 Subject: [PATCH 29/62] attempt to make qonnx tests match better --- test/pytest/test_qonnx.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py index 426df8f2e0..b955608b88 100644 --- a/test/pytest/test_qonnx.py +++ b/test/pytest/test_qonnx.py @@ -88,7 +88,9 @@ def test_tfc_2w2a(tfc_2w2a_model, backend): model = tfc_2w2a_model ishape = (1, 1, 28, 28) - X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape).astype(np.float32) + X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape) + X = (np.round(X * 2**16) * 2**-16).astype(np.float32) + idict = {model.graph.input[0].name: X} y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] @@ -105,15 +107,20 @@ def test_tfc_2w2a(tfc_2w2a_model, backend): @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus']) def test_cnv_2w2a(cnv_2w2a_model, backend): + """ + This tests a convolution model. Note: the batch normalizations weights not quantized, so it + is difficult to make this match perfectly. It is also a slow test. + """ model = cnv_2w2a_model ishape = (1, 32, 32, 3) - X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape).astype(np.float32) + X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape) + X = (np.round(X * 2**6) * 2**-6).astype(np.float32) idict = {model.graph.input[0].name: X} y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] # Convert QONNX model, compile, and run inference - config = hls4ml.utils.config_from_onnx_model(model, backend=backend, default_precision='fixed<32,16>') + config = hls4ml.utils.config_from_onnx_model(model, backend=backend, default_precision='fixed<32,6>') hls_model = hls4ml.converters.convert_from_onnx_model( model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_cnv-2w2a_{backend}'), @@ -134,12 +141,13 @@ def test_jet_tagging(jettagging_model, backend): # Execute QONNX model inference # TODO make the test bigger ishape = (1, 16) - X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape).astype(np.float32) + X = np.random.uniform(low=-1, high=+1, size=np.prod(ishape)).reshape(ishape) + X = (np.round(X * 2**16) * 2**-16).astype(np.float32) idict = {model.graph.input[0].name: X} y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] # Convert QONNX model, compile, and run inference - config = hls4ml.utils.config_from_onnx_model(model, backend=backend) + config = hls4ml.utils.config_from_onnx_model(model, backend=backend, default_precision='fixed<32,16>') hls_model = hls4ml.converters.convert_from_onnx_model( model, output_dir=str(test_root_path / f'hls4mlprj_qonnx_jettag_{backend}'), backend=backend, hls_config=config From 2bcec04b12ad10dcb689536cada563caafea5faf Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Tue, 12 Mar 2024 09:08:35 -0500 Subject: [PATCH 30/62] fix pre-commit --- hls4ml/model/optimizer/passes/move_scales.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py index fe1acb7f94..cec69af5e8 100644 --- a/hls4ml/model/optimizer/passes/move_scales.py +++ b/hls4ml/model/optimizer/passes/move_scales.py @@ -4,6 +4,7 @@ TODO: Check that biases are properly handled. (Attempt to do it via Merge) ''' + import numpy as np from hls4ml.model.layers import ApplyAlpha, Constant, Conv, MatMul, Merge From b3facd25975ac61b02270d04b60efb1fe3e455de Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Wed, 17 Apr 2024 09:59:54 -0500 Subject: [PATCH 31/62] remove count, become more selective on when True is returned --- hls4ml/model/optimizer/passes/infer_precision.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py index 4de58a18c2..ee585c42d6 100644 --- a/hls4ml/model/optimizer/passes/infer_precision.py +++ b/hls4ml/model/optimizer/passes/infer_precision.py @@ -12,17 +12,11 @@ class InferPrecisionTypes(ConfigurableOptimizerPass): def __init__(self): # The option, infer_no_bias, allows you to tailor for the given weights, in particular, zero bias self.infer_no_bias = False - self.count = 0 - self.MAX_COUNT = 1000 def match(self, node): input_var = node.get_input_variable() if input_var is not None and isinstance(input_var.type, UnspecifiedPrecisionType): - # need to wait for the input to update - # but check for infinite loops - self.count += 1 - if self.count == self.MAX_COUNT: - raise RuntimeError("There is an infinite loop in the precision inference.") + # only infer types if the input type is known return False for layer_type in node.types.values(): if isinstance(layer_type.precision, UnspecifiedPrecisionType): @@ -40,7 +34,9 @@ def transform(self, model, node): if type_name not in inferred_types: self._infer_default_type(node, type_name) - return True # May need to rerun + # if the return type was set, this may allow InferPrecisionTypes to be run + # on layers it was not previously able to + return 'result_t' in types_to_infer def _infer_precision(self, node, types_to_infer): node_class = node.class_name From 0d8108eaeacc504a213a3795b1d1482cf621c4cf Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Fri, 19 Apr 2024 14:59:03 -0500 Subject: [PATCH 32/62] fix optimizer issue when quantizer is None --- hls4ml/model/optimizer/passes/linear.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hls4ml/model/optimizer/passes/linear.py b/hls4ml/model/optimizer/passes/linear.py index 1b8e3d9686..b1aee7adc7 100644 --- a/hls4ml/model/optimizer/passes/linear.py +++ b/hls4ml/model/optimizer/passes/linear.py @@ -37,8 +37,10 @@ def match(self, node): def transform(self, model, node): prev_node = node.get_input_node(node.inputs[0]) quantizer = node.get_attr("quantizer") - prev_node.set_attr("quantizer", quantizer) - prev_node.types['result_t'] = quantizer.hls_type - prev_node.get_output_variable().type.precision = quantizer.hls_type + # if the activation has a quantizer (usually from a QONNX Quant node), set the previous node's output precision + if quantizer is not None: + prev_node.set_attr("quantizer", quantizer) + prev_node.types['result_t'] = quantizer.hls_type + prev_node.get_output_variable().type.precision = quantizer.hls_type model.remove_node(node) return True From 1fa59dcd947c99851a5c5bce4301e3ef52407bdc Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 16 May 2024 11:32:10 -0500 Subject: [PATCH 33/62] update pytest image to 0.5.6 --- test/pytest/ci-template.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/pytest/ci-template.yml b/test/pytest/ci-template.yml index afaf90da4d..f6aa700415 100644 --- a/test/pytest/ci-template.yml +++ b/test/pytest/ci-template.yml @@ -1,6 +1,6 @@ .pytest: stage: test - image: gitlab-registry.cern.ch/fastmachinelearning/hls4ml-testing:0.5.5.base + image: gitlab-registry.cern.ch/fastmachinelearning/hls4ml-testing:0.5.6.base tags: - k8s-default before_script: From c5841a2d1754bc1b179b9a70f8bdd7463fd14f1b Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Tue, 25 Jun 2024 18:03:12 -0500 Subject: [PATCH 34/62] seperate out parse_qonnx flow --- hls4ml/model/optimizer/__init__.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index 712dc3822c..eb53ed7925 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -31,7 +31,7 @@ del optimizers register_flow( - 'convert', + 'parse_qonnx', [ 'reshape_constant', 'quant_constant_parameters', @@ -51,9 +51,16 @@ 'merge_to_apply_alpha_div', 'matmul_const_to_dense', 'conv_to_conv_x_d', - 'fuse_consecutive_batch_normalization', # needs to be before infer_precision_types - 'merge_linear_activation', # needs to be before infer_precision_types - 'fuse_batch_normalization', # needs to be before infer_precision_types + ], +) + +register_flow( + 'convert', + [ + 'fuse_consecutive_batch_normalization', + 'merge_linear_activation', + 'fuse_batch_normalization', + # The ones above here need to be before infer_precision_types 'infer_precision_types', 'channels_last_converter', 'remove_transpose_before_flatten', @@ -65,6 +72,7 @@ 'qkeras_factorize_alpha', 'extract_ternary_threshold', ], + requires=['parse_qonnx'], ) # TODO Maybe not all QKeras optmizers belong here? register_flow( From de790ca3f889d777a19da3f802a2708e8cc53788 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Wed, 26 Jun 2024 14:01:46 -0500 Subject: [PATCH 35/62] Again allow for None in target shape--for pytorch --- hls4ml/model/layers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index 0db82ff411..d40d0f04b1 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -402,9 +402,9 @@ def initialize(self): else: raise RuntimeError("Reshape for ONNX requires the target shape to be a second input.") - # nones should not exist here + # remove Nones -- Seems to be used by pytorch parser if target_shape[0] is None: - raise RuntimeError(f"Unexpectedly have a None in {target_shape=}") + target_shape = target_shape[1:] # take care of -1 shapes shape = self._infer_output_shape(input_shape, target_shape) From 2909d154dd49f81f0ada629f2e9bd45786a24ebf Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 18 Jul 2024 14:14:57 -0500 Subject: [PATCH 36/62] Following what seems to be done in the main branch --- hls4ml/model/optimizer/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index eb53ed7925..282561e11e 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -60,6 +60,7 @@ 'fuse_consecutive_batch_normalization', 'merge_linear_activation', 'fuse_batch_normalization', + 'eliminate_linear_activation', # The ones above here need to be before infer_precision_types 'infer_precision_types', 'channels_last_converter', From c9693da8106ee9bf34ce52c9003684d825a7d0e7 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Fri, 19 Jul 2024 10:18:41 -0500 Subject: [PATCH 37/62] update infer_precision based on changes in keras-config-auto --- .../model/optimizer/passes/infer_precision.py | 298 +++++++++++++----- 1 file changed, 211 insertions(+), 87 deletions(-) diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py index 51422c534e..5c1801156f 100644 --- a/hls4ml/model/optimizer/passes/infer_precision.py +++ b/hls4ml/model/optimizer/passes/infer_precision.py @@ -1,9 +1,10 @@ import math +from typing import Iterable import numpy as np from hls4ml.model.optimizer import ConfigurableOptimizerPass -from hls4ml.model.types import FixedPrecisionType, UnspecifiedPrecisionType +from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, PrecisionType, UnspecifiedPrecisionType # TODO: The code assumes everything is Fixed or Integer precision. Need to add checks @@ -67,6 +68,12 @@ def _infer_precision(self, node, types_to_infer): if node_class in ['Dot']: return self._infer_dot_precision(node, types_to_infer) + if node_class in ['Embedding']: + return self._infer_embedding_precision(node, types_to_infer) + + if node_class in ['SimpleRNN', 'LSTM', 'GRU']: + return self._infer_rnn_precision(node, types_to_infer) + # What about quantized activation layer? Setting it to 'auto' manually will break it here. We should prevent # this in config_from_* functions @@ -76,6 +83,20 @@ def _get_default_precision(self, node): model_config = node.model.config return model_config.backend.convert_precision_string(model_config.model_precision['default']) + def _get_maximum_precision(self, node): + model_config = node.model.config + if 'maximum' in model_config.model_precision: + return model_config.backend.convert_precision_string(model_config.model_precision['maximum']) + else: + return None + + def _all_supported_types(self, types: Iterable[PrecisionType]): + """Are all the types supported for inference--currently Integer or Fixed""" + for tp in types: + if not isinstance(tp, (IntegerPrecisionType, FixedPrecisionType)): + return False + return True + def _infer_default_type(self, node, type_name): model_config = node.model.config default_precision = model_config.backend.convert_precision_string(model_config.model_precision['default']) @@ -96,9 +117,6 @@ def _infer_common_precision(self, node, types_to_infer, n_ops): inferred_types = [] input_precision = node.get_input_variable().type.precision - input_width = input_precision.width - input_integers = input_precision.integer - input_signed = input_precision.signed if 'weight_t' in types_to_infer: weight_quantizer = node.get_attr('weight_quantizer', None) @@ -110,10 +128,6 @@ def _infer_common_precision(self, node, types_to_infer, n_ops): node.weights['weight'].update_precision(node.types['weight_t'].precision) inferred_types.append('weight_t') - weight_width = node.types['weight_t'].precision.width - weight_integers = node.types['weight_t'].precision.integer - weight_signed = node.types['weight_t'].precision.signed - if 'bias_t' in types_to_infer: bias_quantizer = node.get_attr('bias_quantizer', None) if bias_quantizer is not None: @@ -124,25 +138,42 @@ def _infer_common_precision(self, node, types_to_infer, n_ops): node.weights['bias'].update_precision(node.types['bias_t'].precision) inferred_types.append('bias_t') - bias_width = node.types['bias_t'].precision.width - bias_integers = node.types['bias_t'].precision.integer - bias_signed = node.types['bias_t'].precision.signed - no_bias = node.weights['bias'].nonzeros == 0 and self.infer_no_bias # no bias + if self._all_supported_types((input_precision, node.types['weight_t'].precision, node.types['bias_t'].precision)): + input_width = input_precision.width + input_integers = input_precision.integer + input_signed = input_precision.signed - # using math.ceil instead of np.ceil because it returns an int - bitwidth = weight_width + input_width + math.ceil(np.log2(n_ops)) - integers = weight_integers + input_integers + math.ceil(np.log2(n_ops)) - signed = weight_signed or input_signed + weight_width = node.types['weight_t'].precision.width + weight_integers = node.types['weight_t'].precision.integer + weight_signed = node.types['weight_t'].precision.signed - frac = bitwidth - integers + bias_width = node.types['bias_t'].precision.width + bias_integers = node.types['bias_t'].precision.integer + bias_signed = node.types['bias_t'].precision.signed + no_bias = node.weights['bias'].nonzeros == 0 and self.infer_no_bias # no bias + + # using math.ceil instead of np.ceil because it returns an int + bitwidth = weight_width + input_width + math.ceil(np.log2(n_ops)) + integers = weight_integers + input_integers + math.ceil(np.log2(n_ops)) + signed = weight_signed or input_signed + + frac = bitwidth - integers - if not no_bias: - integers = max(integers + (bias_signed and not signed), bias_integers + (signed and not bias_signed)) + 1 - bitwidth = integers + max(frac, bias_width - bias_integers) - signed = signed or bias_signed + if not no_bias: + integers = max(integers + (bias_signed and not signed), bias_integers + (signed and not bias_signed)) + 1 + bitwidth = integers + max(frac, bias_width - bias_integers) + signed = signed or bias_signed - # Note: this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form. - new_type = FixedPrecisionType(bitwidth, integers, signed) + # if max_precision is specified, limit the size to be less than max precisoin + max_precision = self._get_maximum_precision(node) + if max_precision is not None: + bitwidth = min(bitwidth, max_precision.width) + integers = min(integers, max_precision.integer) + + # Note: this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form. + new_type = FixedPrecisionType(bitwidth, integers, signed) + else: + new_type = self._get_default_precision(node) if 'accum_t' in types_to_infer: node.types['accum_t'].name = node.name + '_accum_t' @@ -166,6 +197,7 @@ def _infer_conv_precision(self, node, types_to_infer): n_ops = node.get_attr('n_chan') * node.get_attr('filt_height', 1) * node.get_attr('filt_width') return self._infer_common_precision(node, types_to_infer, n_ops) + # This function is ignored because we will split sepconv in the future def _infer_sepconv_precision(self, node, types_to_infer): inferred_types = [] @@ -265,24 +297,35 @@ def _infer_bn_precision(self, node, types_to_infer): scale_precision = node.types['scale_t'].precision bias_precision = node.types['bias_t'].precision - after_scale_signed = scale_precision.signed or input_precision.signed - after_scale_width = input_precision.width + scale_precision.width - after_scale_integer = input_precision.integer + scale_precision.integer + if self._all_supported_types((input_precision, scale_precision, bias_precision)): + + after_scale_signed = scale_precision.signed or input_precision.signed + after_scale_width = input_precision.width + scale_precision.width + after_scale_integer = input_precision.integer + scale_precision.integer - out_precision_signed = after_scale_signed or bias_precision.signed - out_precision_integer = ( - max( - after_scale_integer + (bias_precision.signed and not after_scale_signed), - bias_precision.integer + (after_scale_signed and not bias_precision.signed), + out_precision_signed = after_scale_signed or bias_precision.signed + out_precision_integer = ( + max( + after_scale_integer + (bias_precision.signed and not after_scale_signed), + bias_precision.integer + (after_scale_signed and not bias_precision.signed), + ) + + 1 + ) + out_precision_width = out_precision_integer + max( + after_scale_width - after_scale_integer, bias_precision.fractional ) - + 1 - ) - out_precision_width = out_precision_integer + max( - after_scale_width - after_scale_integer, bias_precision.fractional - ) - # Note: this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form. - out_precision = FixedPrecisionType(out_precision_width, out_precision_integer, out_precision_signed) + # if max_precision is specified, limit the size to be less than max precisoin + max_precision = self._get_maximum_precision(node) + if max_precision is not None: + out_precision_width = min(out_precision_width, max_precision.width) + out_precision_integer = min(out_precision_integer, max_precision.integer) + + # Note: this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form. + out_precision = FixedPrecisionType(out_precision_width, out_precision_integer, out_precision_signed) + + else: + out_precision = self._get_default_precision(node) node.types['result_t'].name = node.name + '_result_t' node.types['result_t'].precision = out_precision @@ -298,20 +341,29 @@ def _infer_pooling_precision(self, node, types_to_infer): input_precision = node.get_input_variable().type.precision pool_op = node.attributes['pool_op'].lower() - width = input_precision.width - integer = input_precision.integer - signed = input_precision.signed + if pool_op == 'max': + # This has the benefit of working for xnor types. I don't think "copy" is needed + accum_type = input_precision + + elif pool_op == 'average': + if self._all_supported_types((input_precision,)): + width = input_precision.width + integer = input_precision.integer + signed = input_precision.signed + + pool_size = node.get_attr('pool_height', 1) * node.get_attr('pool_width') + extra_bits = int(np.ceil(np.log2(pool_size))) + + # for now ignore max precision in this case + accum_type = FixedPrecisionType( + width=width + extra_bits * 2, integer=integer + extra_bits, signed=signed + ) + else: + accum_type = self._get_default_precision(node) - pool_size = node.get_attr('pool_height', 1) * node.get_attr('pool_width') - if pool_op == 'average': - extra_bits = int(np.ceil(np.log2(pool_size))) - elif pool_op == 'max': - extra_bits = 0 else: raise ValueError(f'Unknown pooling operation: {pool_op}') - accum_type = FixedPrecisionType(width=width + extra_bits * 2, integer=integer + extra_bits, signed=signed) - node.types['accum_t'].name = node.name + '_accum_t' node.types['accum_t'].precision = accum_type @@ -331,22 +383,76 @@ def _infer_merge_precision(self, node, types_to_infer): op = node.get_attr('op').lower() if op in ('add', 'subtract', 'average'): - new_signed = input_1.signed or input_2.signed or op == 'subtract' - new_int = ( - max( - input_1.integer + (input_2.signed and not input_1.signed), - input_2.integer + (input_1.signed and not input_2.signed), + if self._all_supported_types((input_1, input_2)): + new_signed = input_1.signed or input_2.signed or op == 'subtract' + new_int = ( + max( + input_1.integer + (input_2.signed and not input_1.signed), + input_2.integer + (input_1.signed and not input_2.signed), + ) + + 1 ) - + 1 - ) - new_width = new_int + max(input_1.fractional, input_2.fractional) - out_precision = FixedPrecisionType(new_width, new_int, new_signed) + new_width = new_int + max(input_1.fractional, input_2.fractional) + max_precision = self._get_maximum_precision(node) + if max_precision is not None: + new_width = min(new_width, max_precision.width) + new_int = min(new_int, max_precision.integer) + out_precision = FixedPrecisionType(new_width, new_int, new_signed) + else: + out_precision = self._get_default_precision(node) elif op == 'multiply': - new_signed = input_1.signed or input_2.signed - new_int = input_1.integer + input_2.integer - new_width = input_1.width + input_2.width - out_precision = FixedPrecisionType(new_width, new_int, new_signed) + if self._all_supported_types((input_1, input_2)): + new_signed = input_1.signed or input_2.signed + new_int = input_1.integer + input_2.integer + new_width = input_1.width + input_2.width + # if max_precision is specified, limit the size to be less than max precisoin + max_precision = self._get_maximum_precision(node) + if max_precision is not None: + new_width = min(new_width, max_precision.width) + new_int = min(new_int, max_precision.integer) + out_precision = FixedPrecisionType(new_width, new_int, new_signed) + else: + out_precision = self._get_default_precision(node) elif op in ('maximum', 'minimum'): + if input_1 == input_2: + # can handle binary and potentially others + out_precision = input_1 # I assume copy is not necessary + elif self._all_supported_types((input_1, input_2)): + new_signed = input_1.signed or input_2.signed + + input_1_integer = input_1.integer + input_2_integer = input_2.integer + + # add one to integer if unsigned while new is signed + if new_signed and not input_1.signed: + input_1_integer += 1 + if new_signed and not input_2.signed: + input_2_integer += 1 + + new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer) + new_int = max(input_1_integer, input_2_integer) + out_precision = FixedPrecisionType(new_width, new_int, new_signed) + else: + out_precision = self._get_default_precision(node) + else: + print(f'Warning: not propagating weights for type {op}') + out_precision = self._get_default_precision(node) + + node.types['result_t'].name = node.name + '_result_t' + node.types['result_t'].precision = out_precision + + return ['result_t'] + + def _infer_cat_precision(self, node, types_to_infer): + assert 'result_t' in types_to_infer and len(types_to_infer) == 1 + + input_1 = node.get_input_variable(node.inputs[0]).type.precision + input_2 = node.get_input_variable(node.inputs[1]).type.precision + + if input_1 == input_2: + # can handle binary and potentially others + out_precision = input_1 # I assume copy is not necessary + elif self._all_supported_types((input_1, input_2)): new_signed = input_1.signed or input_2.signed input_1_integer = input_1.integer @@ -360,9 +466,15 @@ def _infer_merge_precision(self, node, types_to_infer): new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer) new_int = max(input_1_integer, input_2_integer) + + # if max_precision is specified, limit the size to be less than max precisoin + max_precision = self._get_maximum_precision(node) + if max_precision is not None: + new_width = min(new_width, max_precision.width) + new_int = min(new_int, max_precision.integer) + out_precision = FixedPrecisionType(new_width, new_int, new_signed) else: - print(f'Warning: not propagating weights for type {op}') out_precision = self._get_default_precision(node) node.types['result_t'].name = node.name + '_result_t' @@ -370,46 +482,58 @@ def _infer_merge_precision(self, node, types_to_infer): return ['result_t'] - def _infer_cat_precision(self, node, types_to_infer): + def _infer_dot_precision(self, node, types_to_infer): assert 'result_t' in types_to_infer and len(types_to_infer) == 1 input_1 = node.get_input_variable(node.inputs[0]).type.precision input_2 = node.get_input_variable(node.inputs[1]).type.precision - new_signed = input_1.signed or input_2.signed + if self._all_supported_types((input_1, input_2)): + n_in = node.get_input_variable(node.inputs[0]).shape[0] - input_1_integer = input_1.integer - input_2_integer = input_2.integer - - # add one to integer if unsigned while new is signed - if new_signed and not input_1.signed: - input_1_integer += 1 - if new_signed and not input_2.signed: - input_2_integer += 1 + new_signed = input_1.signed or input_2.signed + new_width = input_1.width + input_2.width + math.ceil(np.log2(n_in)) + new_int = input_1.integer + input_2.integer + math.ceil(np.log2(n_in)) - new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer) - new_int = max(input_1_integer, input_2_integer) + # if max_precision is specified, limit the size to be less than max precisoin + max_precision = self._get_maximum_precision(node) + if max_precision is not None: + new_width = min(new_width, max_precision.width) + new_int = min(new_int, max_precision.integer) - out_precision = FixedPrecisionType(new_width, new_int, new_signed) + out_precision = FixedPrecisionType(new_width, new_int, new_signed) + else: + out_precision = self._get_default_precision(node) node.types['result_t'].name = node.name + '_result_t' node.types['result_t'].precision = out_precision return ['result_t'] - def _infer_dot_precision(self, node, types_to_infer): - assert 'result_t' in types_to_infer and len(types_to_infer) == 1 + def _infer_embedding_precision(self, node, types_to_infer): + inferred_types = [] - input_1 = node.get_input_variable(node.inputs[0]).type.precision - input_2 = node.get_input_variable(node.inputs[1]).type.precision + if 'embeddings_t' in types_to_infer: + self._infer_default_type(node, 'embeddings_t') + node.weights['embeddings'].update_precision(node.types['embeddings_t'].precision) + inferred_types.append('embeddings_t') + + if 'result_t' in types_to_infer: + out_precision = self._get_default_precision(node) + node.types['result_t'].name = node.name + '_result_t' + node.types['result_t'].precision = out_precision + inferred_types.append('result_t') - n_in = node.get_input_variable(node.inputs[0]).shape[0] + return inferred_types - new_signed = input_1.signed or input_2.signed - new_width = input_1.width + input_2.width + math.ceil(np.log2(n_in)) - new_int = input_1.integer + input_2.integer + math.ceil(np.log2(n_in)) + # TODO: This is just a placeholder + def _infer_rnn_precision(self, node, types_to_infer): + inferred_types = [] - out_precision = FixedPrecisionType(new_width, new_int, new_signed) - node.types['result_t'].name = node.name + '_result_t' - node.types['result_t'].precision = out_precision + # for now just do the weights and leave the rest for the default catch + for weightvar in ('weight', 'bias', 'recurrent_weight', 'recurrent_bias'): + if f'{weightvar}_t' in types_to_infer: + self._infer_default_type(node, f'{weightvar}_t') + node.weights[weightvar].update_precision(node.types[f'{weightvar}_t'].precision) + inferred_types.append(f'{weightvar}_t') - return ['result_t'] + return inferred_types From aaaa2fcfe01a8aed2efb09707f2eb423366dac1e Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Fri, 19 Jul 2024 10:20:26 -0500 Subject: [PATCH 38/62] loosen batchnorm merging restrictions, fix ternary handling --- hls4ml/model/optimizer/__init__.py | 4 +-- .../model/optimizer/passes/batchnorm_opt.py | 32 +++++++++---------- hls4ml/model/optimizer/passes/qkeras.py | 10 +++++- test/pytest/test_qkeras.py | 4 ++- 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index 282561e11e..c6270d8f28 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -61,6 +61,8 @@ 'merge_linear_activation', 'fuse_batch_normalization', 'eliminate_linear_activation', + 'qkeras_factorize_alpha', + 'extract_ternary_threshold', # The ones above here need to be before infer_precision_types 'infer_precision_types', 'channels_last_converter', @@ -70,8 +72,6 @@ 'fuse_bias_add', 'expand_layer_group', 'output_rounding_saturation_mode', - 'qkeras_factorize_alpha', - 'extract_ternary_threshold', ], requires=['parse_qonnx'], ) # TODO Maybe not all QKeras optmizers belong here? diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py index 26292d7e2a..94a9a32d70 100644 --- a/hls4ml/model/optimizer/passes/batchnorm_opt.py +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -170,12 +170,12 @@ def match(self, node): s1 = node.weights['scale'].data_unquantized b1 = node.weights['bias'].data_unquantized scale_compatible = ( - (prev_node.get_attr('scale_quantizer') is None and node.get_attr('scale_quantizer') is None) + (prev_node.get_attr('scale_quantizer') is None or node.get_attr('scale_quantizer') is None) or (s0 == np.ones_like(s0)).all() or (s1 == np.ones_like(s1)).all() ) bias_compatible = ( - (prev_node.get_attr('bias_quantizer') is None and node.get_attr('bias_quantizer') is None) + (prev_node.get_attr('bias_quantizer') is None or node.get_attr('bias_quantizer') is None) or (b0 == np.zeros_like(b0)).all() or (b1 == np.zeros_like(b1)).all() ) @@ -195,26 +195,24 @@ def transform(self, model, node): # if len(node_map[node.outputs[0]]) > 1: # return False - # only merge if the types are integer or fixed - if ( - not isinstance(prev_node.weights['scale'].type.precision, (IntegerPrecisionType, FixedPrecisionType)) - or not isinstance(prev_node.weights['bias'].type.precision, (IntegerPrecisionType, FixedPrecisionType)) - or not isinstance(node.weights['scale'].type.precision, (IntegerPrecisionType, FixedPrecisionType)) - or not isinstance(node.weights['bias'].type.precision, (IntegerPrecisionType, FixedPrecisionType)) - ): - return False - s0 = prev_node.weights['scale'].data_unquantized b0 = prev_node.weights['bias'].data_unquantized s1 = node.weights['scale'].data_unquantized b1 = node.weights['bias'].data_unquantized - s_quantizer = ( - node.get_attr('scale_quantizer') if (s0 == np.ones_like(s0)).all() else prev_node.get_attr('scale_quantizer') - ) - b_quantizer = ( - node.get_attr('bias_quantizer') if (b0 == np.zeros_like(b0)).all() else prev_node.get_attr('bias_quantizer') - ) + if (s0 == np.ones_like(s0)).all(): + s_quantizer = node.get_attr('scale_quantizer') + elif (s1 == np.ones_like(s1)).all(): + s_quantizer = prev_node.get_attr('scale_quantizer') + else: + s_quantizer = None + + if (b0 == np.ones_like(b0)).all(): + b_quantizer = node.get_attr('bias_quantizer') + elif (b1 == np.ones_like(b1)).all(): + b_quantizer = prev_node.get_attr('bias_quantizer') + else: + b_quantizer = None node.set_attr('scale_quantizer', s_quantizer) node.set_attr('bias_quantizer', b_quantizer) diff --git a/hls4ml/model/optimizer/passes/qkeras.py b/hls4ml/model/optimizer/passes/qkeras.py index a97438832d..03690bed0d 100644 --- a/hls4ml/model/optimizer/passes/qkeras.py +++ b/hls4ml/model/optimizer/passes/qkeras.py @@ -163,8 +163,16 @@ def transform(self, model, node): else: n_in = node.get_attr('n_out') + # the name of the new ApplyAlpha node + alpha_name = node.get_attr('name') + '_alpha' + + # make the precision auto + alpha_precision = {'Precision': 'auto'} + model.config.set_name_config(alpha_name, alpha_precision) + model.config.parse_name_config(alpha_name, alpha_precision) + attrs = { - 'name': node.get_attr('name') + '_alpha', + 'name': alpha_name, 'class_name': 'Alpha', 'inputs': node.outputs, 'n_in': n_in, diff --git a/test/pytest/test_qkeras.py b/test/pytest/test_qkeras.py index 45d015807b..5f62475d1a 100644 --- a/test/pytest/test_qkeras.py +++ b/test/pytest/test_qkeras.py @@ -356,8 +356,10 @@ def test_relu_negative_slope(randX_1000_1, quantizer, backend, io_type): ], ) def test_qactivation_kwarg(randX_100_10, activation_quantizer, weight_quantizer): - if activation_quantizer in ['binary', 'ternary']: + if activation_quantizer in ['binary']: name = 'bnbt_qdense_alpha' + elif activation_quantizer in ['ternary']: + name = 'bnbt_qdense_ternary_scale' else: name = f'qdense_{eval(activation_quantizer).__class__.__name__}' From a2b88f4a1a9f6c4ddb06bdf50c5e5e8d21dd0eb4 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Fri, 19 Jul 2024 17:24:52 -0500 Subject: [PATCH 39/62] remove some backends from slow qonnx test --- test/pytest/test_qonnx.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py index b955608b88..5b7b9d95c9 100644 --- a/test/pytest/test_qonnx.py +++ b/test/pytest/test_qonnx.py @@ -105,11 +105,11 @@ def test_tfc_2w2a(tfc_2w2a_model, backend): np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) -@pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus']) +@pytest.mark.parametrize('backend', ['Vitis']) def test_cnv_2w2a(cnv_2w2a_model, backend): """ - This tests a convolution model. Note: the batch normalizations weights not quantized, so it - is difficult to make this match perfectly. It is also a slow test. + This tests a convolution model. Note: the batch normalizations weights not quantized, so it is + difficult to make this match perfectly. It is also a slow test, which is why only Vitis is tested. """ model = cnv_2w2a_model From ef02b4f4a45ae4c032d8ea49fc9854e8d4de7bc7 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Wed, 21 Aug 2024 11:15:05 -0500 Subject: [PATCH 40/62] move multi_dense to conv above inferming precision types --- hls4ml/model/optimizer/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index e311eb96cf..64be9903ad 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -63,6 +63,7 @@ 'eliminate_linear_activation', 'qkeras_factorize_alpha', 'extract_ternary_threshold', + 'replace_multidimensional_dense_with_conv', 'seperable_to_depthwise_and_conv', # The ones above here need to be before infer_precision_types 'infer_precision_types', @@ -74,7 +75,6 @@ 'expand_layer_group', 'output_rounding_saturation_mode', 'fuse_consecutive_batch_normalization', - 'replace_multidimensional_dense_with_conv', 'enforce_proxy_model_embedded_config', ], requires=['parse_qonnx'], From c3ffa7bf5fde0c54b4d514ff2a18c5c1228e9549 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Wed, 21 Aug 2024 11:38:40 -0500 Subject: [PATCH 41/62] fix the default reuse factor --- hls4ml/utils/config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hls4ml/utils/config.py b/hls4ml/utils/config.py index 51e87244e4..78f033c28c 100644 --- a/hls4ml/utils/config.py +++ b/hls4ml/utils/config.py @@ -401,6 +401,8 @@ def make_layer_config(layer): precision_cfg[name] = 'auto' else: precision_cfg[name] = str(attr.default) + elif attr.name == 'reuse_factor': + layer_config[attr.config_name] = default_reuse_factor else: if attr.default is not None: layer_config[attr.config_name] = attr.default From cc7652de36847360b54c99c6fb9cad3665760943 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Tue, 3 Sep 2024 11:15:53 -0500 Subject: [PATCH 42/62] Pre-commit fix --- hls4ml/converters/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hls4ml/converters/__init__.py b/hls4ml/converters/__init__.py index 40515afea6..c5ff82703c 100644 --- a/hls4ml/converters/__init__.py +++ b/hls4ml/converters/__init__.py @@ -11,6 +11,7 @@ from hls4ml.converters.keras_to_hls import parse_keras_model # noqa: F401 from hls4ml.converters.keras_to_hls import keras_to_hls, register_keras_layer_handler from hls4ml.converters.onnx_to_hls import parse_onnx_model # noqa: F401 + # from hls4ml.converters.pytorch_to_hls import parse_pytorch_model # noqa: F401 from hls4ml.model import ModelGraph from hls4ml.utils.config import create_config From b36fe4ff2eadd2c023550dec580a64e0dbb2b5ef Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Tue, 3 Sep 2024 19:19:48 -0500 Subject: [PATCH 43/62] fix qonnx review suggestions --- hls4ml/converters/__init__.py | 2 -- hls4ml/converters/onnx/core.py | 14 ------------- .../model/optimizer/passes/batchnorm_opt.py | 21 ++++++++----------- hls4ml/model/optimizer/passes/bn_fuse.py | 11 ++++------ 4 files changed, 13 insertions(+), 35 deletions(-) diff --git a/hls4ml/converters/__init__.py b/hls4ml/converters/__init__.py index c5ff82703c..13e90df687 100644 --- a/hls4ml/converters/__init__.py +++ b/hls4ml/converters/__init__.py @@ -11,8 +11,6 @@ from hls4ml.converters.keras_to_hls import parse_keras_model # noqa: F401 from hls4ml.converters.keras_to_hls import keras_to_hls, register_keras_layer_handler from hls4ml.converters.onnx_to_hls import parse_onnx_model # noqa: F401 - -# from hls4ml.converters.pytorch_to_hls import parse_pytorch_model # noqa: F401 from hls4ml.model import ModelGraph from hls4ml.utils.config import create_config from hls4ml.utils.symbolic_utils import LUTFunction diff --git a/hls4ml/converters/onnx/core.py b/hls4ml/converters/onnx/core.py index c6aaa6009c..d84ba98a95 100644 --- a/hls4ml/converters/onnx/core.py +++ b/hls4ml/converters/onnx/core.py @@ -29,7 +29,6 @@ def parse_matmul_layer(node, input_names, input_shapes, graph): 'Softmax', 'Softsign', 'Softplus', - # 'Clip', ] activation_map = { @@ -45,7 +44,6 @@ def parse_matmul_layer(node, input_names, input_shapes, graph): 'Softmax': 'Softmax', 'Softsign': 'Activation', 'Softplus': 'Activation', - # 'Clip': 'Clip', } # --------- @@ -69,18 +67,6 @@ def parse_activation_layer(node, input_names, input_shapes, graph): layer['activation'] = layer['class_name'] layer['activ_param'] = get_onnx_attribute(node, 'alpha', 0.01) - # # Don't yet support Clip - # elif layer['class_name'] == 'Clip': - # clip_min_node = [x for x in graph.initializer if x.name in input_names] - # clip_min = clip_min_node[0].float_data[0] - - # # Check if it's relu or not - # if clip_min == 0.0: - # layer['class_name'] = 'Activation' - # layer['activation'] = 'ReLU' - # else: - # raise Exception('Clip with min != 0 is not supported yet!') - else: layer['activation'] = layer['class_name'] layer['class_name'] = 'Activation' diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py index 94a9a32d70..50bbf96e04 100644 --- a/hls4ml/model/optimizer/passes/batchnorm_opt.py +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -30,7 +30,7 @@ def transform(self, model, node): gamma_node = node.get_input_node(node.inputs[1]) if not isinstance(gamma_node, Constant): - raise TypeError('Only consant gammas supported') + raise TypeError('Only constant gammas supported') gamma = gamma_node.attributes['value'] attributes['gamma_data'] = gamma attributes['gamma_quantizer'] = gamma_node.get_attr('quantizer') @@ -40,7 +40,7 @@ def transform(self, model, node): beta_node = node.get_input_node(node.inputs[2]) if not isinstance(beta_node, Constant): - raise TypeError('Only consant betas supported') + raise TypeError('Only constant betas supported') beta = beta_node.attributes['value'] attributes['beta_data'] = beta attributes['beta_quantizer'] = beta_node.get_attr('quantizer') @@ -49,7 +49,7 @@ def transform(self, model, node): moving_mean_node = node.get_input_node(node.inputs[3]) if not isinstance(moving_mean_node, Constant): - raise TypeError('Only consant moving_means supported') + raise TypeError('Only constant moving_means supported') moving_mean = moving_mean_node.attributes['value'] attributes['mean_data'] = moving_mean attributes['mean_quantizer'] = moving_mean_node.get_attr('quantizer') @@ -58,7 +58,7 @@ def transform(self, model, node): moving_variance_node = node.get_input_node(node.inputs[4]) if not isinstance(moving_variance_node, Constant): - raise TypeError('Only consant moving_variances supported') + raise TypeError('Only constant moving_variances supported') moving_variance = moving_variance_node.attributes['value'] attributes['variance_data'] = moving_variance attributes['variance_quantizer'] = moving_variance_node.get_attr('quantizer') @@ -147,12 +147,14 @@ def transform(self, model, node): class FuseConsecutiveBatchNormalization(OptimizerPass): """ - OptimizerPass to merge consecutive BatchNormalization layers, - only if the earlier one does not have quantization specified + OptimizerPass to merge consecutive BatchNormalization layers, only if the earlier one does not have the output type + specified. There is a further check on the compatibility to merge: except in cases when merging a scale of 1 or a + bias of 0, this does not merge when both scales or both biases are quantized. Note: Consider restricting this to ApplyAlpha. Batch Normalization-style quantization seems to be ignored. - Note: This optimizer may not be safe if weights are updateable. May need to turn off. + Note: This optimizer may not be safe if weights are updateable, in particular if a scale can go from ones to other + values or if a bias can go from zeros to other values. """ def match(self, node): @@ -190,11 +192,6 @@ def transform(self, model, node): if len(prev_map[prev_node.outputs[0]]) > 1: return False - # # Not sure why this part is needed - # node_map = node.get_output_use_map() - # if len(node_map[node.outputs[0]]) > 1: - # return False - s0 = prev_node.weights['scale'].data_unquantized b0 = prev_node.weights['bias'].data_unquantized s1 = node.weights['scale'].data_unquantized diff --git a/hls4ml/model/optimizer/passes/bn_fuse.py b/hls4ml/model/optimizer/passes/bn_fuse.py index b3e8e454c8..000d8380ce 100644 --- a/hls4ml/model/optimizer/passes/bn_fuse.py +++ b/hls4ml/model/optimizer/passes/bn_fuse.py @@ -7,8 +7,10 @@ class FuseBatchNormalization(OptimizerPass): """ - OptimizerPass to merge BatchNormalization layers, - only if the earlier one does not have quantization specified + OptimizerPass to merge a BatchNormalization layer with Dense or Conv layer, only if the Dense or Conv layer does not + have the output type specified. There is a further check on the compatibility to merge: except in cases when merging a + weight/scale of 1 or a bias of 0, this optimizer does not merge nodes when both the weight and scale or both biases + are quantized. Note: Consider restricting this to ApplyAlpha. Batch Normalization quantization seems to be ignored. @@ -49,11 +51,6 @@ def transform(self, model, node): if len(parent_map[parent_node.outputs[0]]) > 1: return False - # # Not sure why this part is needed - # node_map = node.get_output_use_map() - # if len(node_map[node.outputs[0]]) > 1: - # return False - parent_weight = parent_node.weights['weight'] parent_bias = parent_node.weights['bias'] From c37d953181f64396d079c5d4b5f51dabceae8e2e Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Tue, 3 Sep 2024 19:22:44 -0500 Subject: [PATCH 44/62] fix qonnx review suggestions (part 2) --- hls4ml/model/optimizer/passes/merge_const.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hls4ml/model/optimizer/passes/merge_const.py b/hls4ml/model/optimizer/passes/merge_const.py index 78591d203c..a75ed27aca 100644 --- a/hls4ml/model/optimizer/passes/merge_const.py +++ b/hls4ml/model/optimizer/passes/merge_const.py @@ -67,7 +67,7 @@ def transform(self, model, node): class MergeToApplyAlpha(OptimizerPass): - """Convert Add, Sub, Mul, or Div Merges with consant to ApplyAlpha""" + """Convert Add, Sub, Mul, or Div Merges with constant to ApplyAlpha""" def match(self, node): is_match = ( @@ -178,7 +178,7 @@ def transform(self, model, node): class MergeToApplyAlphaDiv(OptimizerPass): """ - Convert Div Merges with consant to ApplyAlpha + Convert Div Merges with constant to ApplyAlpha TODO: propagate precision """ From 23825ded13fb418516af29fa6a4768c97bc98ba8 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Wed, 4 Sep 2024 12:10:19 -0500 Subject: [PATCH 45/62] fix error message --- hls4ml/model/optimizer/passes/batchnorm_opt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py index 50bbf96e04..cd238092c8 100644 --- a/hls4ml/model/optimizer/passes/batchnorm_opt.py +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -24,7 +24,7 @@ def transform(self, model, node): """ if not (len(node.inputs) == 5 and all(node.inputs)): - raise ValueError(f'All {len.node.inputs} BatchNormOnnnx inputs need to be defined') + raise ValueError('All 5 BatchNormOnnnx inputs need to be defined') attributes = {k: node.attributes.get(k, None) for k in _base_attributes} From cad06fa9361810fb006061a65fbea2b49feee50b Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Mon, 9 Sep 2024 14:50:21 -0500 Subject: [PATCH 46/62] change order of qonnx optimizers --- hls4ml/model/optimizer/__init__.py | 2 +- hls4ml/model/optimizer/passes/batchnorm_opt.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index 64be9903ad..fee180b0c5 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -37,8 +37,8 @@ 'quant_constant_parameters', 'quant_to_activation', 'fuse_quant_with_constant', - 'quant_to_alpha_activation_alpha', 'const_quant_to_const_alpha', + 'quant_to_alpha_activation_alpha', 'batch_norm_onnx_constant_parameters', 'constant_batch_norm_fusion', 'merge_two_constants', diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py index cd238092c8..0dde6b77a9 100644 --- a/hls4ml/model/optimizer/passes/batchnorm_opt.py +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np from hls4ml.model.layers import BatchNormalization, BatchNormOnnx, Constant @@ -94,11 +96,14 @@ def transform(self, model, node): """ Remove the batch norm """ + warnings.warn('ConstantBatchNormFusion should probably not be triggered. Check the optimizer order.', stacklevel=2) const_node = node.get_input_node(node.inputs[0]) const_prec = const_node.get_output_variable().type.precision - new_val = const_node.value * node.weights['scale'].data_unquantized + node.weights['bias'].data_unquantized + new_val = ( + const_node.attributes['value'] * node.weights['scale'].data_unquantized + node.weights['bias'].data_unquantized + ) const_node.set_attr('value', new_val) const_node.set_attr('quantizer', node.get_attr('quantizer')) # None if not defined From 51c80f96e4c6caf0f3c5310a6b334ddd008e9c1d Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 12 Sep 2024 12:16:43 -0500 Subject: [PATCH 47/62] make the optimizer oder be more similar to main branch --- hls4ml/model/optimizer/__init__.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index fee180b0c5..840d42ebf2 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -57,25 +57,24 @@ register_flow( 'convert', [ - 'fuse_consecutive_batch_normalization', + 'channels_last_converter', 'merge_linear_activation', - 'fuse_batch_normalization', - 'eliminate_linear_activation', - 'qkeras_factorize_alpha', - 'extract_ternary_threshold', - 'replace_multidimensional_dense_with_conv', 'seperable_to_depthwise_and_conv', - # The ones above here need to be before infer_precision_types - 'infer_precision_types', - 'channels_last_converter', 'remove_transpose_before_flatten', 'remove_nop_transpose', 'remove_single_channel_transpose', 'fuse_bias_add', 'expand_layer_group', 'output_rounding_saturation_mode', + 'qkeras_factorize_alpha', + 'extract_ternary_threshold', 'fuse_consecutive_batch_normalization', + 'fuse_batch_normalization', + 'replace_multidimensional_dense_with_conv', 'enforce_proxy_model_embedded_config', + 'eliminate_linear_activation', + # many of the above optimzers need to be done before this + 'infer_precision_types', ], requires=['parse_qonnx'], ) # TODO Maybe not all QKeras optmizers belong here? @@ -83,10 +82,7 @@ register_flow( 'optimize', [ - 'eliminate_linear_activation', 'remove_nop_batch_normalization', - 'infer_precision_types', - 'set_precision_concat', ], requires=['convert'], ) From 8eaf10a1557fb56c8abd97966e86488357dde1b7 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 19 Sep 2024 16:16:16 -0500 Subject: [PATCH 48/62] fix dimensions when moving scales --- hls4ml/model/layers.py | 2 +- hls4ml/model/optimizer/passes/move_scales.py | 71 +++++++++++--------- hls4ml/model/optimizer/passes/quant_opt.py | 22 +++--- 3 files changed, 54 insertions(+), 41 deletions(-) diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index e5ceaca28d..bc3bc2b1c3 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -976,7 +976,7 @@ def initialize(self): class BatchNormalization(Layer): _expected_attributes = [ Attribute('n_in'), - Attribute('n_filt', default=0), + Attribute('n_filt', default=-1), WeightAttribute('scale'), WeightAttribute('bias'), TypeAttribute('scale'), diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py index cec69af5e8..3776a6d202 100644 --- a/hls4ml/model/optimizer/passes/move_scales.py +++ b/hls4ml/model/optimizer/passes/move_scales.py @@ -67,15 +67,16 @@ def transform(self, model, node): bias = np.array(bias1d[0]) output = node.get_output_variable() + # to remove warning, since these get set again + new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')} can_propagate = False if not bias.shape and bias == 0: # zero bias, propagate through, if possible # (always possible if scale is scalar) try: - np.broadcast_to(scale, output.shape) # check size compatibility - newscale = scale - newbias = np.array(0) + newscale = np.broadcast_to(scale, output.shape) # check size compatibility + newbias = np.zeros(output.shape) can_propagate = True except ValueError: can_propagate = False @@ -84,10 +85,9 @@ def transform(self, model, node): if not can_propagate and isinstance(inp[other_idx], Constant): # can handle nonzero bias in some cases if other value is a Constant try: - np.broadcast_to(scale, output.shape) # check size compatibility - newscale = scale - newbias = inp[other_idx].attributes['value'] * bias - np.broadcast_to(newbias, output.shape) + newscale = np.broadcast_to(scale, output.shape) # check size compatibility + newbias = np.broadcast_to(inp[other_idx].attributes['value'] * bias, output.shape) + new_attrs.pop('bias_precision', None) # remove special bias precision settings can_propagate = True except ValueError: can_propagate = False @@ -97,9 +97,10 @@ def transform(self, model, node): model.remove_node(apply_alpha) - new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs]) - new_node.add_weights(newscale) - new_node.add_bias(newbias) + new_attrs['scale_data'] = newscale + new_attrs['bias_data'] = newbias + + new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs]) model.insert_node(new_node) return True @@ -136,9 +137,11 @@ def transform(self, model, node): model.remove_node(in0) model.remove_node(in1) - new_node = model.make_node('ApplyAlpha', in0.name, in0.attributes, [x for x in node.outputs]) - new_node.add_weights(scale) - new_node.add_bias(bias) + new_attrs = in0.attributes + new_attrs['scale_data'] = scale + new_attrs['bias_data'] = bias + + new_node = model.make_node('ApplyAlpha', in0.name, new_attrs, [x for x in node.outputs]) model.insert_node(new_node) return True @@ -170,15 +173,16 @@ def transform(self, model, node): bias = np.array(bias1d[0]) output = node.get_output_variable() + # to remove warning, since these get set again + new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')} can_propagate = False if not bias.shape and bias == 0: # zero bias, propagate through, if possible # (always possible if scale is scalar) try: - np.broadcast_to(scale, output.shape) # check broadcastable - newscale = scale - newbias = np.array(0) + newscale = np.broadcast_to(scale, output.shape) # check broadcastable + newbias = np.zeros(output.shape) can_propagate = True except ValueError: can_propagate = False @@ -188,9 +192,10 @@ def transform(self, model, node): model.remove_node(apply_alpha) - new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs]) - new_node.add_weights(newscale) - new_node.add_bias(newbias) + new_attrs['scale_data'] = newscale + new_attrs['bias_data'] = newbias + + new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs]) model.insert_node(new_node) return True @@ -224,15 +229,16 @@ def transform(self, model, node): bias = np.array(bias1d[0]) output = node.get_output_variable() + # to remove warning, since these get set again + new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')} can_propagate = False if not bias.shape and bias == 0: # zero bias, propagate through, if possible # (always possible if scale is scalar) try: - np.broadcast_to(scale, output.shape) # make sure broadcastable - newscale = scale - newbias = np.array(0) + newscale = np.broadcast_to(scale, output.shape) # make sure broadcastable + newbias = np.zeros(output.shape) can_propagate = True except ValueError: can_propagate = False @@ -242,9 +248,10 @@ def transform(self, model, node): model.remove_node(apply_alpha) - new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs]) - new_node.add_weights(newscale) - new_node.add_bias(newbias) + new_attrs['scale_data'] = newscale + new_attrs['bias_data'] = newbias + + new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs]) model.insert_node(new_node) return True @@ -278,14 +285,15 @@ def transform(self, model, node): bias = np.array(bias1d[0]) output = node.get_output_variable() + # to remove warning, since these get set again + new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')} can_propagate = False if not scale.shape and scale == 1: # No scale, just additional bias try: - np.broadcast_to(bias, output.shape) - newscale = np.array(1) - newbias = bias + newscale = np.ones(output.shape) + newbias = np.broadcast_to(bias, output.shape) can_propagate = True except ValueError: can_propagate = False @@ -295,8 +303,9 @@ def transform(self, model, node): model.remove_node(apply_alpha) - new_node = model.make_node('ApplyAlpha', apply_alpha.name, apply_alpha.attributes, [x for x in node.outputs]) - new_node.add_weights(newscale) - new_node.add_bias(newbias) + new_attrs['scale_data'] = newscale + new_attrs['bias_data'] = newbias + + new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs]) model.insert_node(new_node) return True diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py index ed7f9701a2..69e9ca7685 100644 --- a/hls4ml/model/optimizer/passes/quant_opt.py +++ b/hls4ml/model/optimizer/passes/quant_opt.py @@ -252,11 +252,13 @@ def transform(self, model, node): # but now add the ApplyAlhpas before and after + inshape = node.get_input_variable().shape + scale = node.get_attr('scale') bias = node.get_attr('zeropt') - attributes_scale = {} - attributes_rescale = {} + attributes_scale = {'n_filt': -1} + attributes_rescale = {'n_filt': -1} scale_config = copy.deepcopy(config) scale_name = f'{node.name}_scale' @@ -270,16 +272,16 @@ def transform(self, model, node): firstscale = 1 / scale firstbias = bias - attributes_scale['scale_data'] = firstscale - attributes_scale['bias_data'] = firstbias + attributes_scale['scale_data'] = np.broadcast_to(firstscale, inshape) + attributes_scale['bias_data'] = np.broadcast_to(firstbias, inshape) scale_node = model.make_node(ApplyAlpha, scale_name, attributes_scale, [node.inputs[0]]) model.insert_node(scale_node) rescale = scale rebias = -bias * scale - attributes_rescale['scale_data'] = rescale - attributes_rescale['bias_data'] = rebias + attributes_rescale['scale_data'] = np.broadcast_to(rescale, inshape) + attributes_rescale['bias_data'] = np.broadcast_to(rebias, inshape) rescale_node = model.make_node(ApplyAlpha, rescale_name, attributes_rescale, [new_node.outputs[0]]) model.insert_node(rescale_node) @@ -332,7 +334,9 @@ def transform(self, model, node): const_node.types['result_t'].precision = precision const_node.get_output_variable().type.precision = precision - attributes_rescale = {} + inshape = node.get_input_variable().shape + + attributes_rescale = {'n_filt': -1} rescale_config = copy.deepcopy(model.config.get_layer_config(node)) rescale_name = f'{node.name}_rescale' @@ -341,8 +345,8 @@ def transform(self, model, node): rescale = scale rebias = -bias * scale - attributes_rescale['scale_data'] = rescale - attributes_rescale['bias_data'] = rebias + attributes_rescale['scale_data'] = np.broadcast_to(rescale, inshape) + attributes_rescale['bias_data'] = np.broadcast_to(rebias, inshape) rescale_node = model.make_node( ApplyAlpha, rescale_name, attributes_rescale, [x for x in node.inputs], [x for x in node.outputs] From d80dc3b410d2a2578a79aae905530a92e7b732a1 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 19 Sep 2024 21:44:22 -0500 Subject: [PATCH 49/62] Added support and some missing parts for `Depthwise` and `Pointwise` Convolutions from QONNX --- hls4ml/converters/onnx/convolution.py | 11 ++- hls4ml/model/optimizer/__init__.py | 1 + .../model/optimizer/passes/conv_to_convxd.py | 16 ++-- .../passes/conv_to_depthwiseconvxd.py | 94 +++++++++++++++++++ 4 files changed, 113 insertions(+), 9 deletions(-) create mode 100644 hls4ml/model/optimizer/passes/conv_to_depthwiseconvxd.py diff --git a/hls4ml/converters/onnx/convolution.py b/hls4ml/converters/onnx/convolution.py index 85dc0ca804..d84fb855a8 100644 --- a/hls4ml/converters/onnx/convolution.py +++ b/hls4ml/converters/onnx/convolution.py @@ -21,13 +21,18 @@ def parse_conv_layer(node, input_names, input_shapes, graph): if dilations is None: dilations = [1] * len(layer['kernel_shape']) - if get_onnx_attribute(node, 'group') != 1: - raise ValueError("Only 1 group supported corrently") - layer['in_width'] = input_shapes[0][-2] layer['n_chan'] = input_shapes[0][-1] layer['n_filt'] = input_shapes[1][0] + layer['group'] = int(get_onnx_attribute(node, 'group')) + if layer['group'] != 1: + layer['depth_multiplier'] = get_onnx_attribute(node, 'group') / layer['n_chan'] + if not layer['depth_multiplier'].is_integer(): + raise ValueError('Depth multiplier must be an integer') + else: + layer['depth_multiplier'] = int(layer['depth_multiplier']) + layer['n_dim'] = len(input_shapes[0]) - 2 # 2 comes from channels and batch dimentions if layer['n_dim'] not in (1, 2): raise ValueError("Only 1D and 2D convolutions are supported") diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index 840d42ebf2..10f652345f 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -51,6 +51,7 @@ 'merge_to_apply_alpha_div', 'matmul_const_to_dense', 'conv_to_conv_x_d', + 'conv_to_depthwise_conv_x_d', ], ) diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py index 6fb88ad0d0..25ac50ba40 100644 --- a/hls4ml/model/optimizer/passes/conv_to_convxd.py +++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py @@ -29,12 +29,16 @@ class ConvToConvXD(OptimizerPass): """Convert Conv with constant to a Conv1D or Conv2D layer""" def match(self, node): - is_match = isinstance(node, Conv) and ( - (len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant)) - or ( - len(node.inputs) == 3 - and isinstance(node.get_input_node(node.inputs[1]), Constant) - and isinstance(node.get_input_node(node.inputs[2]), Constant) + is_match = ( + isinstance(node, Conv) + and node.get_attr('group') == 1 + and ( + (len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant)) + or ( + len(node.inputs) == 3 + and isinstance(node.get_input_node(node.inputs[1]), Constant) + and isinstance(node.get_input_node(node.inputs[2]), Constant) + ) ) ) diff --git a/hls4ml/model/optimizer/passes/conv_to_depthwiseconvxd.py b/hls4ml/model/optimizer/passes/conv_to_depthwiseconvxd.py new file mode 100644 index 0000000000..26603c6a64 --- /dev/null +++ b/hls4ml/model/optimizer/passes/conv_to_depthwiseconvxd.py @@ -0,0 +1,94 @@ +import numpy as np + +from hls4ml.model.layers import Constant, Conv, DepthwiseConv1D, DepthwiseConv2D +from hls4ml.model.optimizer import OptimizerPass + +# these are attributes to copy +_base_attributes = ( + 'in_width', + 'out_width', + 'n_chan', + 'n_filt', + 'pad_left', + 'pad_right', + 'filt_width', + 'stride_width', + 'dilation_width', + 'in_height', + 'out_height', + 'pad_top', + 'pad_bottom', + 'filt_height', + 'stride_height', + 'dilation_height', + 'data_format', +) + + +class ConvToDepthwiseConvXD(OptimizerPass): + """Convert Conv with constant to a DepthwiseConv1D or DepthwiseConv2D layer""" + + def match(self, node): + is_match = ( + isinstance(node, Conv) + and node.get_attr('group') == node.get_attr('n_chan') + and (node.get_attr('group') != 1) + and ( + (len(node.inputs) == 2 and isinstance(node.get_input_node(node.inputs[1]), Constant)) + or ( + len(node.inputs) == 3 + and isinstance(node.get_input_node(node.inputs[1]), Constant) + and isinstance(node.get_input_node(node.inputs[2]), Constant) + ) + ) + ) + + return is_match + + def transform(self, model, node): + """Convert Conv with constant to a DepthwiseConv1D or DepthwiseConv2D layer""" + + weight_node = node.get_input_node(node.inputs[1]) + weight_data = weight_node.attributes['value'] + bias_node = None + if len(node.inputs) == 3: + bias_node = node.get_input_node(node.inputs[2]) + + # creating the attributes + attributes = {k: node.attributes.get(k, None) for k in _base_attributes} + + # The ConvxD nodes expect the weight data to be in a different format, not (M, k1.., C) + if node.attributes['n_dim'] == 1: + newtype = DepthwiseConv1D + attributes['depthwise_data'] = np.transpose(weight_data, (1, 2, 0)) + else: + newtype = DepthwiseConv2D + attributes['depthwise_data'] = np.transpose(weight_data, (1, 2, 3, 0)) + attributes['depthwise_quantizer'] = weight_node.get_attr('quantizer') + + if bias_node: + attributes['bias_data'] = bias_node.attributes['value'] + attributes['bias_quantizer'] = bias_node.get_attr('quantizer') + attributes['use_bias'] = True + else: + attributes['bias_data'] = np.zeros(attributes['n_filt']) + attributes['use_bias'] = False + + # get the configuration name + config = model.config.get_layer_config(node) + new_name = f'{newtype.__name__}_{node.name}' + model.config.set_name_config(new_name, config) + model.config.parse_name_config(new_name, config) + + # making new node + new_node = model.make_node(newtype, new_name, attributes, [node.inputs[0]], [x for x in node.outputs]) + + # removing and replacing old nodes + if bias_node: + model.remove_node(bias_node, rewire=False) + del node.inputs[2] + model.remove_node(weight_node, rewire=False) + del node.inputs[1] + model.replace_node(node, new_node) + + return True From fae647d6df5e2256591eed362ebb22375e4f4efc Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Mon, 23 Sep 2024 16:51:00 -0500 Subject: [PATCH 50/62] add seperable conv to test --- example-models | 2 +- test/pytest/test_qonnx.py | 44 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/example-models b/example-models index ff74f73dbc..a81e36eb16 160000 --- a/example-models +++ b/example-models @@ -1 +1 @@ -Subproject commit ff74f73dbc253d1aa7de1603ee10ede551919548 +Subproject commit a81e36eb16593450d7661e7b9686666ddb397208 diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py index 5b7b9d95c9..e9ef37578f 100644 --- a/test/pytest/test_qonnx.py +++ b/test/pytest/test_qonnx.py @@ -14,6 +14,23 @@ import hls4ml test_root_path = Path(__file__).parent +example_model_path = (test_root_path / '../../example-models').resolve() + + +@pytest.fixture(scope='module') +def sep_conv_model(): + """ + Load separabale conv model + """ + dl_file = str(example_model_path / "onnx/separable_conv_model_ch_last.onnx") + assert os.path.isfile(dl_file) + out_file = str(test_root_path / "separable_conv_model_ch_last_clean.onnx") + + # cleanup + qonnx.util.cleanup.cleanup(dl_file, out_file=out_file) + model = ModelWrapper(out_file) + + return model @pytest.fixture(scope='module') @@ -83,6 +100,33 @@ def jettagging_model(): return model +@pytest.mark.parametrize('backend', ['Vitis']) +def test_sep_conv(sep_conv_model, backend): + model = sep_conv_model + ishape = tuple(model.get_tensor_shape(model.graph.input[0].name)) + X = np.random.uniform(low=0, high=1, size=np.prod(ishape)).reshape(ishape) + # X = (np.round(X * 2**16) * 2**-16).astype(np.float32) + idict = {model.graph.input[0].name: X} + y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] + + config = hls4ml.utils.config.config_from_onnx_model( + model, granularity='name', backend=backend, default_precision='fixed<16,6>' + ) + + hls_model = hls4ml.converters.convert_from_onnx_model( + model, + output_dir=str(test_root_path / f'hls4mlprj_qonnx_sep_conv_{backend}'), + io_type='io_stream', + backend=backend, + hls_config=config, + ) + hls_model.compile() + y_hls4ml = hls_model.predict(np.ascontiguousarray(X)) + + np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) + print('test') + + @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus']) def test_tfc_2w2a(tfc_2w2a_model, backend): model = tfc_2w2a_model From 56c85a442e0aee27fae8fa457fa273e0ec111a95 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Tue, 24 Sep 2024 00:15:47 -0500 Subject: [PATCH 51/62] fix pointwise with naming, quant_opt --- hls4ml/backends/catapult/passes/pointwise.py | 8 ++++---- hls4ml/backends/quartus/passes/pointwise.py | 6 ++---- hls4ml/backends/vivado/passes/pointwise.py | 9 +++++---- hls4ml/model/optimizer/passes/quant_opt.py | 2 +- test/pytest/test_qonnx.py | 7 ++----- 5 files changed, 14 insertions(+), 18 deletions(-) diff --git a/hls4ml/backends/catapult/passes/pointwise.py b/hls4ml/backends/catapult/passes/pointwise.py index 0141d7f108..fd464ef172 100755 --- a/hls4ml/backends/catapult/passes/pointwise.py +++ b/hls4ml/backends/catapult/passes/pointwise.py @@ -1,5 +1,3 @@ -from copy import copy - from hls4ml.backends.catapult.passes.convolution_templates import ( Conv1DConfigTemplate, Conv1DFunctionTemplate, @@ -75,8 +73,10 @@ def match(self, node): def transform(self, model, node): dim = node.__class__.__name__[-2:] # '1D' or '2D' - pw_node = model.make_node('PointwiseConv' + dim, node.name, copy(node.attributes), node.inputs.copy()) - pw_node.weights['bias'].data = node.weights['bias'].data + new_attrs = {k: v for k, v in node.attributes.items() if k not in ('trace', 'precision', 'reuse_factor')} + pw_node = model.make_node( + 'PointwiseConv' + dim, node.name, new_attrs, node.inputs.copy(), outputs=node.outputs.copy() + ) # Set strategy to ensure lowercase string is passed to the template if model.config.is_resource_strategy(pw_node): pw_node.set_attr('strategy', 'resource') diff --git a/hls4ml/backends/quartus/passes/pointwise.py b/hls4ml/backends/quartus/passes/pointwise.py index 0f7f6821ae..d65ab22569 100644 --- a/hls4ml/backends/quartus/passes/pointwise.py +++ b/hls4ml/backends/quartus/passes/pointwise.py @@ -1,5 +1,3 @@ -from copy import copy - from hls4ml.backends.fpga.fpga_layers import PointwiseConv1D, PointwiseConv2D from hls4ml.backends.quartus.passes.convolution_templates import ( Conv1DConfigTemplate, @@ -81,10 +79,10 @@ def match(self, node): def transform(self, model, node): dim = node.__class__.__name__[-2:] # '1D' or '2D' + new_attrs = {k: v for k, v in node.attributes.items() if k not in ('trace', 'precision', 'reuse_factor')} pw_node = model.make_node( - 'PointwiseConv' + dim, node.name, copy(node.attributes), node.inputs.copy(), outputs=node.outputs.copy() + 'PointwiseConv' + dim, node.name, new_attrs, node.inputs.copy(), outputs=node.outputs.copy() ) - pw_node.weights['bias'].data = node.weights['bias'].data model.replace_node(node, pw_node) return True diff --git a/hls4ml/backends/vivado/passes/pointwise.py b/hls4ml/backends/vivado/passes/pointwise.py index 85d2635cb8..34568b09f7 100644 --- a/hls4ml/backends/vivado/passes/pointwise.py +++ b/hls4ml/backends/vivado/passes/pointwise.py @@ -1,5 +1,3 @@ -from copy import copy - from hls4ml.backends.fpga.fpga_layers import PointwiseConv1D, PointwiseConv2D from hls4ml.backends.vivado.passes.convolution_templates import ( Conv1DConfigTemplate, @@ -75,8 +73,11 @@ def match(self, node): def transform(self, model, node): dim = node.__class__.__name__[-2:] # '1D' or '2D' - pw_node = model.make_node('PointwiseConv' + dim, node.name, copy(node.attributes), node.inputs.copy()) - pw_node.weights['bias'].data = node.weights['bias'].data + # to remove warning, since these get set again + new_attrs = {k: v for k, v in node.attributes.items() if k not in ('trace', 'precision', 'reuse_factor')} + pw_node = model.make_node( + 'PointwiseConv' + dim, node.name, new_attrs, node.inputs.copy(), outputs=node.outputs.copy() + ) # Set strategy to ensure lowercase string is passed to the template if model.config.is_resource_strategy(pw_node): pw_node.set_attr('strategy', 'resource') diff --git a/hls4ml/model/optimizer/passes/quant_opt.py b/hls4ml/model/optimizer/passes/quant_opt.py index 69e9ca7685..cac29b5040 100644 --- a/hls4ml/model/optimizer/passes/quant_opt.py +++ b/hls4ml/model/optimizer/passes/quant_opt.py @@ -187,7 +187,7 @@ def transform(self, model, node): integer = bitwidth scale = node.get_attr('scale') if _ALSO_MATCH_PO2 and not (scale == np.ones_like(scale)).all(): - _, exp = np.frexp(np.squeeze(scale)) + _, exp = np.frexp(scale[0]) # know that np.all(scale[0] == scale) must be true integer = bitwidth + exp - 1 precision, quantizer = _calculate_precision_quantizer(bitwidth, integer, signed, narrow, rounding_mode) diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py index e9ef37578f..58d8b68fe2 100644 --- a/test/pytest/test_qonnx.py +++ b/test/pytest/test_qonnx.py @@ -20,15 +20,12 @@ @pytest.fixture(scope='module') def sep_conv_model(): """ - Load separabale conv model + Load separabale conv model, already channels-last and cleaned """ dl_file = str(example_model_path / "onnx/separable_conv_model_ch_last.onnx") assert os.path.isfile(dl_file) - out_file = str(test_root_path / "separable_conv_model_ch_last_clean.onnx") - # cleanup - qonnx.util.cleanup.cleanup(dl_file, out_file=out_file) - model = ModelWrapper(out_file) + model = ModelWrapper(dl_file) return model From b0efdd6275a02eb9c18b82c29f90f30f380ac693 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Tue, 24 Sep 2024 10:45:59 -0500 Subject: [PATCH 52/62] fix ConstantBatchNormFusion --- hls4ml/model/optimizer/passes/batchnorm_opt.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py index 0dde6b77a9..b6c21c7267 100644 --- a/hls4ml/model/optimizer/passes/batchnorm_opt.py +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -89,6 +89,9 @@ def match(self, node): isinstance(node, BatchNormalization) and not any(node.inputs[1:]) and isinstance(node.get_input_node(node.inputs[0]), Constant) + and isinstance( + node.get_input_node(node.inputs[0]).get_output_variable().type.precision, UnspecifiedPrecisionType + ) ) return is_match From 14da6f5d2be0feb6a65b1c0c626631a19b70041e Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Wed, 25 Sep 2024 09:13:01 -0500 Subject: [PATCH 53/62] update broadcasting for moving scales for conv --- hls4ml/model/optimizer/passes/move_scales.py | 27 ++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py index 3776a6d202..1197480eaa 100644 --- a/hls4ml/model/optimizer/passes/move_scales.py +++ b/hls4ml/model/optimizer/passes/move_scales.py @@ -237,9 +237,21 @@ def transform(self, model, node): # zero bias, propagate through, if possible # (always possible if scale is scalar) try: - newscale = np.broadcast_to(scale, output.shape) # make sure broadcastable + if scale.ndim > 1: + # undo any broadcast_to + reduced_scale = _remove_redundant_dims(scale) + if reduced_scale.shape[-1] == 1: + reduced_scale = reduced_scale[..., 0] + if node.attributes['n_dim'] == 1: + scale_trans = np.transpose(reduced_scale, (1, 0)) + else: + scale_trans = np.transpose(reduced_scale, (1, 2, 0)) + newscale = np.broadcast_to(scale_trans, output.shape) # make sure broadcastable + can_propagate = True + else: + newscale = np.broadcast_to(scale, output.shape) # make sure broadcastable + can_propagate = True newbias = np.zeros(output.shape) - can_propagate = True except ValueError: can_propagate = False @@ -309,3 +321,14 @@ def transform(self, model, node): new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs]) model.insert_node(new_node) return True + + +def _remove_redundant_dims(X): + """This is somewhat of the inverse of broadcast-to. It sets the dimension size to 1 if all values are identical""" + + shape = X.shape + for i in range(len(shape)): + reduced = np.expand_dims(np.take(X, 0, axis=i), axis=i) + if np.all(reduced == X): + X = reduced + return X From 0333d36894e4378081639c76f0c0d7ac0f9d3d52 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 26 Sep 2024 09:18:38 -0500 Subject: [PATCH 54/62] snapshot of current development --- hls4ml/model/optimizer/__init__.py | 1 + hls4ml/model/optimizer/passes/move_scales.py | 274 +++++++++++-------- 2 files changed, 168 insertions(+), 107 deletions(-) diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index 10f652345f..d82d45668d 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -43,6 +43,7 @@ 'constant_batch_norm_fusion', 'merge_two_constants', 'scale_down_add', + 'bias_down_add', 'scale_down_mat_mul', 'scale_down_weight_conv', 'scale_down_bias_conv', diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py index 1197480eaa..ecf1099ee5 100644 --- a/hls4ml/model/optimizer/passes/move_scales.py +++ b/hls4ml/model/optimizer/passes/move_scales.py @@ -56,15 +56,7 @@ def transform(self, model, node): scale = apply_alpha.weights['scale'].data_unquantized bias = apply_alpha.weights['bias'].data_unquantized - scale1d = np.ravel(scale) - if (scale1d[0] == scale).all(): - # scalar scale - scale = np.array(scale1d[0]) - - bias1d = np.ravel(bias) - if (bias1d[0] == bias).all(): - # scalar bias - bias = np.array(bias1d[0]) + scale, bias = _make_scalar(scale, bias) output = node.get_output_variable() # to remove warning, since these get set again @@ -146,73 +138,54 @@ def transform(self, model, node): return True -class ScaleDownConv(OptimizerPass): - '''Shift an ApplyAlpha on input below a Conv''' +class BiasDownAdd(OptimizerPass): + '''Shift a ApplyAlpha with only bias below a Merge (Add)''' def match(self, node): - '''Shift an ApplyAlpha from the Weight''' - is_match = isinstance(node, Conv) and isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha) - + '''Match if there is only one ApplyAlpha. If there are two, if the scale of both is 0, they would + match the ScaleDownAdd, so this optimizer does not need to handle that case. + ''' + is_match = isinstance(node, Merge) and len(node.inputs) == 2 and node.attributes["op"] == "add" + if is_match: + in0 = node.get_input_node(node.inputs[0]) + in1 = node.get_input_node(node.inputs[1]) + is_match = ( + (isinstance(in0, ApplyAlpha) + or isinstance(in1, ApplyAlpha)) + and not (isinstance(in0, ApplyAlpha) + and isinstance(in1, ApplyAlpha)) + ) # only one ApplyAlpha return is_match def transform(self, model, node): - apply_alpha = node.get_input_node(node.inputs[0]) - - # Check if we can move - scale = apply_alpha.weights['scale'].data_unquantized - bias = apply_alpha.weights['bias'].data_unquantized - - scale1d = np.ravel(scale) - if (scale1d[0] == scale).all(): - # scalar scale - scale = np.array(scale1d[0]) - - bias1d = np.ravel(bias) - if (bias1d[0] == bias).all(): - # scalar bias - bias = np.array(bias1d[0]) + in0 = node.get_input_node(node.inputs[0]) + in1 = node.get_input_node(node.inputs[1]) - output = node.get_output_variable() - # to remove warning, since these get set again - new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')} + alpha_node = in0 if isinstance(in0, ApplyAlpha) else in1 - can_propagate = False - if not bias.shape and bias == 0: - # zero bias, propagate through, if possible - # (always possible if scale is scalar) - try: - newscale = np.broadcast_to(scale, output.shape) # check broadcastable - newbias = np.zeros(output.shape) - can_propagate = True - except ValueError: - can_propagate = False + # Check if we can move + scale = alpha_node.weights['scale'].data_unquantized - if not can_propagate: + if (scale == 0).all(): + model.remove_node(alpha_node) + new_node = model.make_node('ApplyAlpha', alpha_node.name, alpha_node.attributes, [x for x in node.outputs]) + model.insert_node(new_node) + return True + else: return False - model.remove_node(apply_alpha) - - new_attrs['scale_data'] = newscale - new_attrs['bias_data'] = newbias - - new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs]) - model.insert_node(new_node) - return True - -class ScaleDownWeightConv(OptimizerPass): - '''Shift an ApplyAlpha weight (from conv side) below a Conv''' +class ScaleDownConv(OptimizerPass): + '''Shift an ApplyAlpha on input below a Conv''' def match(self, node): '''Shift an ApplyAlpha from the Weight''' - is_match = ( - isinstance(node, Conv) and len(node.inputs) > 1 and isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha) - ) + is_match = isinstance(node, Conv) and isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha) return is_match def transform(self, model, node): - apply_alpha = node.get_input_node(node.inputs[1]) + apply_alpha = node.get_input_node(node.inputs[0]) # Check if we can move scale = apply_alpha.weights['scale'].data_unquantized @@ -237,21 +210,9 @@ def transform(self, model, node): # zero bias, propagate through, if possible # (always possible if scale is scalar) try: - if scale.ndim > 1: - # undo any broadcast_to - reduced_scale = _remove_redundant_dims(scale) - if reduced_scale.shape[-1] == 1: - reduced_scale = reduced_scale[..., 0] - if node.attributes['n_dim'] == 1: - scale_trans = np.transpose(reduced_scale, (1, 0)) - else: - scale_trans = np.transpose(reduced_scale, (1, 2, 0)) - newscale = np.broadcast_to(scale_trans, output.shape) # make sure broadcastable - can_propagate = True - else: - newscale = np.broadcast_to(scale, output.shape) # make sure broadcastable - can_propagate = True + newscale = np.broadcast_to(scale, output.shape) # check broadcastable newbias = np.zeros(output.shape) + can_propagate = True except ValueError: can_propagate = False @@ -268,57 +229,141 @@ def transform(self, model, node): return True -class ScaleDownBiasConv(OptimizerPass): - '''Shift an ApplyAlpha bias (from conv side) below a Conv''' +class ScaleDownConv(OptimizerPass): + '''Shift an ApplyAlpha on a Conv with 2-3 inputs''' def match(self, node): '''Shift an ApplyAlpha from the Weight''' is_match = ( - isinstance(node, Conv) and len(node.inputs) > 2 and isinstance(node.get_input_node(node.inputs[2]), ApplyAlpha) + isinstance(node, Conv) and len(node.inputs) > 1 and + (isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha) + or isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha) + or (len(node.inputs) == 3 and isinstance(node.get_input_node(node.inputs[2]), ApplyAlpha))) ) - return is_match def transform(self, model, node): - apply_alpha = node.get_input_node(node.inputs[2]) - - # Check if we can move - scale = apply_alpha.weights['scale'].data_unquantized - bias = apply_alpha.weights['bias'].data_unquantized - - scale1d = np.ravel(scale) - if (scale1d[0] == scale).all(): - # scalar scale - scale = np.array(scale1d[0]) - - bias1d = np.ravel(bias) - if (bias1d[0] == bias).all(): - # scalar bias - bias = np.array(bias1d[0]) + in0 = node.get_input_node(node.inputs[0]) + in1 = node.get_input_node(node.inputs[1]) + in2 = node.get_input_node(node.inputs[2]) if len(node.inputs) == 3 else None + + aa0 = isinstance(in0, ApplyAlpha) + aa1 = isinstance(in1, ApplyAlpha) + aa2 = isinstance(in2, ApplyAlpha) if len(node.inputs) == 3 else False + + if not isinstance(in1, (Constant, ApplyAlpha)): + raise RuntimeError("The weight node needs to be ApplyAlpha or Constant") + if len(node.inputs) == 3 and not isinstance(in2, (Constant, ApplyAlpha)): + raise RuntimeError("The bias node needs to be ApplyAlpha or Constant") + + scale0 = in0.weights['scale'].data_unquantized if aa0 else None + bias0 = in0.weights['bias'].data_unquantized if aa0 else None + scale1 = in1.weights['scale'].data_unquantized if aa1 else None + bias1 = in1.weights['bias'].data_unquantized if aa1 else None + scale2 = in2.weights['scale'].data_unquantized if aa2 else None + bias2 = in2.weights['bias'].data_unquantized if aa2 else None + + # If possible, make scale and bias have scalar values + if aa0: + scale0, bias0 = _make_scalar(scale0, bias0) + if aa1: + scale1, bias1 = _make_scalar(scale1, bias1) + if aa2: + scale2, bias2 = _make_scalar(scale2, bias2) output = node.get_output_variable() - # to remove warning, since these get set again - new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')} - - can_propagate = False - if not scale.shape and scale == 1: - # No scale, just additional bias - try: - newscale = np.ones(output.shape) - newbias = np.broadcast_to(bias, output.shape) - can_propagate = True - except ValueError: - can_propagate = False + if (aa0 and not aa1 and not aa2): + # only datapath has a scale + bias = in2.attributes['value'] if len(node.inputs) == 3 else 0 + conv_nobias = np.all(bias == 0) + + can_propagate = False + if not bias0.shape and bias0 == 0: + # zero bias, propagate through, if possible + # (always possible if scale is scalar) + if conv_nobias: + try: + newscale = np.broadcast_to(_remove_redundant_dims(scale0), output.shape) # check broadcastable + newbias = np.zeros(output.shape) + can_propagate = True + except ValueError: + can_propagate = False + elif not scale0.shape: + # scalar scale0 + try: + newscale = np.broadcast_to(scale0, output.shape) # check broadcastable + newbias = np.broadcast_to(bias * (1 - scale0), output.shape) + can_propagate = True + except ValueError: + can_propagate = False + if not can_propagate: + return False + + # to remove warning, since these get set again + new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')} + new_name = in0.name + model.remove_node(in0) + elif (not aa0 and aa1 and not aa2): + # only weights have a scale + bias = in2.attributes['value'] if len(node.inputs) == 3 else 0 + conv_nobias = np.all(bias == 0) + + can_propagate = False + if not bias1.shape and bias1 == 0: + # zero bias, propagate through, if possible + # (always possible if scale is scalar) + try: + if scale1.ndim > 1: + # undo any broadcast_to + reduced_scale = _remove_redundant_dims(scale1) + if reduced_scale.shape[-1] == 1: + reduced_scale = reduced_scale[..., 0] + if node.attributes['n_dim'] == 1: + scale_trans = np.transpose(reduced_scale, (1, 0)) + else: + scale_trans = np.transpose(reduced_scale, (1, 2, 0)) + newscale = np.broadcast_to(scale_trans, output.shape) # make sure broadcastable + can_propagate = True + else: + newscale = np.broadcast_to(scale1, output.shape) # make sure broadcastable + can_propagate = True + newbias = np.zeros(output.shape) + except ValueError: + can_propagate = False + + if not can_propagate: + return False + + # to remove warning, since these get set again + new_attrs = {k: v for k, v in in1.attributes.items() if k not in ('trace', 'precision')} + new_name = in1.name + model.remove_node(in1) + + elif (not aa0 and not aa1 and aa2): + # only bias has a scale + + can_propagate = False + if not scale2.shape and scale2 == 1: + # No scale, just additional bias + try: + newscale = np.ones(output.shape) + newbias = np.broadcast_to(bias2, output.shape) + can_propagate = True + except ValueError: + can_propagate = False - if not can_propagate: - return False + if not can_propagate: + return False - model.remove_node(apply_alpha) + # to remove warning, since these get set again + new_attrs = {k: v for k, v in in2.attributes.items() if k not in ('trace', 'precision')} + new_name = in2.name + model.remove_node(in2) new_attrs['scale_data'] = newscale new_attrs['bias_data'] = newbias - new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs]) + new_node = model.make_node('ApplyAlpha', new_name, new_attrs, [x for x in node.outputs]) model.insert_node(new_node) return True @@ -332,3 +377,18 @@ def _remove_redundant_dims(X): if np.all(reduced == X): X = reduced return X + + +def _make_scalar(scale, bias): + """Make the scale and bias scalar if possible""" + scale1d = np.ravel(scale) + if (scale1d[0] == scale).all(): + # scalar scale + scale = np.array(scale1d[0]) + + bias1d = np.ravel(bias) + if (bias1d[0] == bias).all(): + # scalar bias + bias = np.array(bias1d[0]) + + return scale, bias From 80184d21514ab617bf4950c0476aac34964616ab Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 26 Sep 2024 12:09:47 -0500 Subject: [PATCH 55/62] snapshot working through scale downs --- example-models | 2 +- hls4ml/model/optimizer/passes/move_scales.py | 189 ++++++++++--------- 2 files changed, 103 insertions(+), 88 deletions(-) diff --git a/example-models b/example-models index a81e36eb16..ff74f73dbc 160000 --- a/example-models +++ b/example-models @@ -1 +1 @@ -Subproject commit a81e36eb16593450d7661e7b9686666ddb397208 +Subproject commit ff74f73dbc253d1aa7de1603ee10ede551919548 diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py index ecf1099ee5..0ccdf07c61 100644 --- a/hls4ml/model/optimizer/passes/move_scales.py +++ b/hls4ml/model/optimizer/passes/move_scales.py @@ -149,11 +149,8 @@ def match(self, node): if is_match: in0 = node.get_input_node(node.inputs[0]) in1 = node.get_input_node(node.inputs[1]) - is_match = ( - (isinstance(in0, ApplyAlpha) - or isinstance(in1, ApplyAlpha)) - and not (isinstance(in0, ApplyAlpha) - and isinstance(in1, ApplyAlpha)) + is_match = (isinstance(in0, ApplyAlpha) or isinstance(in1, ApplyAlpha)) and not ( + isinstance(in0, ApplyAlpha) and isinstance(in1, ApplyAlpha) ) # only one ApplyAlpha return is_match @@ -175,70 +172,19 @@ def transform(self, model, node): return False -class ScaleDownConv(OptimizerPass): - '''Shift an ApplyAlpha on input below a Conv''' - - def match(self, node): - '''Shift an ApplyAlpha from the Weight''' - is_match = isinstance(node, Conv) and isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha) - - return is_match - - def transform(self, model, node): - apply_alpha = node.get_input_node(node.inputs[0]) - - # Check if we can move - scale = apply_alpha.weights['scale'].data_unquantized - bias = apply_alpha.weights['bias'].data_unquantized - - scale1d = np.ravel(scale) - if (scale1d[0] == scale).all(): - # scalar scale - scale = np.array(scale1d[0]) - - bias1d = np.ravel(bias) - if (bias1d[0] == bias).all(): - # scalar bias - bias = np.array(bias1d[0]) - - output = node.get_output_variable() - # to remove warning, since these get set again - new_attrs = {k: v for k, v in apply_alpha.attributes.items() if k not in ('trace', 'precision')} - - can_propagate = False - if not bias.shape and bias == 0: - # zero bias, propagate through, if possible - # (always possible if scale is scalar) - try: - newscale = np.broadcast_to(scale, output.shape) # check broadcastable - newbias = np.zeros(output.shape) - can_propagate = True - except ValueError: - can_propagate = False - - if not can_propagate: - return False - - model.remove_node(apply_alpha) - - new_attrs['scale_data'] = newscale - new_attrs['bias_data'] = newbias - - new_node = model.make_node('ApplyAlpha', apply_alpha.name, new_attrs, [x for x in node.outputs]) - model.insert_node(new_node) - return True - - class ScaleDownConv(OptimizerPass): '''Shift an ApplyAlpha on a Conv with 2-3 inputs''' def match(self, node): '''Shift an ApplyAlpha from the Weight''' is_match = ( - isinstance(node, Conv) and len(node.inputs) > 1 and - (isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha) - or isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha) - or (len(node.inputs) == 3 and isinstance(node.get_input_node(node.inputs[2]), ApplyAlpha))) + isinstance(node, Conv) + and len(node.inputs) > 1 + and ( + isinstance(node.get_input_node(node.inputs[0]), ApplyAlpha) + or isinstance(node.get_input_node(node.inputs[1]), ApplyAlpha) + or (len(node.inputs) == 3 and isinstance(node.get_input_node(node.inputs[2]), ApplyAlpha)) + ) ) return is_match @@ -272,14 +218,14 @@ def transform(self, model, node): scale2, bias2 = _make_scalar(scale2, bias2) output = node.get_output_variable() - if (aa0 and not aa1 and not aa2): + if aa0 and not aa1 and not aa2: # only datapath has a scale bias = in2.attributes['value'] if len(node.inputs) == 3 else 0 conv_nobias = np.all(bias == 0) can_propagate = False if not bias0.shape and bias0 == 0: - # zero bias, propagate through, if possible + # No zero offset, propagate through, if possible # (always possible if scale is scalar) if conv_nobias: try: @@ -303,34 +249,58 @@ def transform(self, model, node): new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')} new_name = in0.name model.remove_node(in0) - elif (not aa0 and aa1 and not aa2): + + elif not aa0 and aa1 and not aa2: # only weights have a scale bias = in2.attributes['value'] if len(node.inputs) == 3 else 0 conv_nobias = np.all(bias == 0) - + can_propagate = False if not bias1.shape and bias1 == 0: - # zero bias, propagate through, if possible + # No zero offset, propagate through, if possible # (always possible if scale is scalar) - try: - if scale1.ndim > 1: - # undo any broadcast_to - reduced_scale = _remove_redundant_dims(scale1) - if reduced_scale.shape[-1] == 1: - reduced_scale = reduced_scale[..., 0] - if node.attributes['n_dim'] == 1: - scale_trans = np.transpose(reduced_scale, (1, 0)) - else: - scale_trans = np.transpose(reduced_scale, (1, 2, 0)) - newscale = np.broadcast_to(scale_trans, output.shape) # make sure broadcastable + if conv_nobias: + try: + if scale1.ndim > 1: + # undo any broadcast_to + reduced_scale0 = _remove_redundant_dims(scale0) if scale0.ndim > 1 else scale0 + reduced_scale1 = _remove_redundant_dims(scale1) + reduced_scale = reduced_scale0 @ reduced_scale1 + if reduced_scale.shape[-1] == 1: + reduced_scale = reduced_scale[..., 0] + if node.attributes['n_dim'] == 1: + scale_trans = np.transpose(reduced_scale, (1, 0)) + else: + scale_trans = np.transpose(reduced_scale, (1, 2, 0)) + newscale = np.broadcast_to(scale_trans, output.shape) # make sure broadcastable + can_propagate = True + elif scale0.ndim > 1: + # scale1 is scalar + # undo any broadcast_to + reduced_scale0 = _remove_redundant_dims(scale0) + reduced_scale = scale1 * reduced_scale0 + if reduced_scale.shape[-1] == 1: + reduced_scale = reduced_scale[..., 0] + if node.attributes['n_dim'] == 1: + scale_trans = np.transpose(reduced_scale, (1, 0)) + else: + scale_trans = np.transpose(reduced_scale, (1, 2, 0)) + newscale = np.broadcast_to(scale_trans, output.shape) # make sure broadcastable + can_propagate = True + else: + newscale = np.broadcast_to(scale0 * scale1, output.shape) # make sure broadcastable can_propagate = True - else: - newscale = np.broadcast_to(scale1, output.shape) # make sure broadcastable + newbias = np.zeros(output.shape) + except ValueError: + can_propagate = False + elif not scale0.shape and not scale1.shape: + # scalar scale1 + try: + newscale = np.broadcast_to(scale0 * scale1, output.shape) # check broadcastable + newbias = np.broadcast_to(bias * (1 - scale0 * scale1), output.shape) can_propagate = True - newbias = np.zeros(output.shape) - except ValueError: - can_propagate = False - + except ValueError: + can_propagate = False if not can_propagate: return False @@ -339,9 +309,9 @@ def transform(self, model, node): new_name = in1.name model.remove_node(in1) - elif (not aa0 and not aa1 and aa2): + elif not aa0 and not aa1 and aa2: # only bias has a scale - + can_propagate = False if not scale2.shape and scale2 == 1: # No scale, just additional bias @@ -360,6 +330,51 @@ def transform(self, model, node): new_name = in2.name model.remove_node(in2) + elif aa0 and aa1 and not aa2: + # dataflow and weights have an ApplyAlpha + bias = in2.attributes['value'] if len(node.inputs) == 3 else 0 + conv_nobias = np.all(bias == 0) + + can_propagate = False + if not bias0.shape and bias0 == 0 and not bias1.shape and bias1 == 0: + # zero bias, propagate through, if possible + # (always possible if scale is scalar) + if conv_nobias: + try: + if scale1.ndim > 1: + # undo any broadcast_to + reduced_scale = _remove_redundant_dims(scale1) + if reduced_scale.shape[-1] == 1: + reduced_scale = reduced_scale[..., 0] + if node.attributes['n_dim'] == 1: + scale_trans = np.transpose(reduced_scale, (1, 0)) + else: + scale_trans = np.transpose(reduced_scale, (1, 2, 0)) + newscale = np.broadcast_to(scale_trans, output.shape) # make sure broadcastable + can_propagate = True + else: + newscale = np.broadcast_to(scale1, output.shape) # make sure broadcastable + can_propagate = True + newbias = np.zeros(output.shape) + except ValueError: + can_propagate = False + elif not scale1.shape: + # scalar scale1 + try: + newscale = np.broadcast_to(scale1, output.shape) # check broadcastable + newbias = np.broadcast_to(bias * (1 - scale1), output.shape) + can_propagate = True + except ValueError: + can_propagate = False + if not can_propagate: + return False + + # to remove warning, since these get set again + new_attrs = {k: v for k, v in in1.attributes.items() if k not in ('trace', 'precision')} + new_name = in1.name + model.remove_node(in1) + + # after the big if-else above new_attrs['scale_data'] = newscale new_attrs['bias_data'] = newbias From 6bb08172a7f9dfeccc0ba6d6e72df21fbc0059d1 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 26 Sep 2024 16:03:34 -0500 Subject: [PATCH 56/62] finish making the various cases --- hls4ml/model/optimizer/passes/move_scales.py | 131 +++++++++++++++---- 1 file changed, 104 insertions(+), 27 deletions(-) diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py index 0ccdf07c61..b2470f2839 100644 --- a/hls4ml/model/optimizer/passes/move_scales.py +++ b/hls4ml/model/optimizer/passes/move_scales.py @@ -251,7 +251,7 @@ def transform(self, model, node): model.remove_node(in0) elif not aa0 and aa1 and not aa2: - # only weights have a scale + # only weights have an ApplyAlpha bias = in2.attributes['value'] if len(node.inputs) == 3 else 0 conv_nobias = np.all(bias == 0) @@ -263,22 +263,7 @@ def transform(self, model, node): try: if scale1.ndim > 1: # undo any broadcast_to - reduced_scale0 = _remove_redundant_dims(scale0) if scale0.ndim > 1 else scale0 - reduced_scale1 = _remove_redundant_dims(scale1) - reduced_scale = reduced_scale0 @ reduced_scale1 - if reduced_scale.shape[-1] == 1: - reduced_scale = reduced_scale[..., 0] - if node.attributes['n_dim'] == 1: - scale_trans = np.transpose(reduced_scale, (1, 0)) - else: - scale_trans = np.transpose(reduced_scale, (1, 2, 0)) - newscale = np.broadcast_to(scale_trans, output.shape) # make sure broadcastable - can_propagate = True - elif scale0.ndim > 1: - # scale1 is scalar - # undo any broadcast_to - reduced_scale0 = _remove_redundant_dims(scale0) - reduced_scale = scale1 * reduced_scale0 + reduced_scale = _remove_redundant_dims(scale1) if reduced_scale.shape[-1] == 1: reduced_scale = reduced_scale[..., 0] if node.attributes['n_dim'] == 1: @@ -288,16 +273,16 @@ def transform(self, model, node): newscale = np.broadcast_to(scale_trans, output.shape) # make sure broadcastable can_propagate = True else: - newscale = np.broadcast_to(scale0 * scale1, output.shape) # make sure broadcastable + newscale = np.broadcast_to(scale1, output.shape) # make sure broadcastable can_propagate = True newbias = np.zeros(output.shape) except ValueError: can_propagate = False - elif not scale0.shape and not scale1.shape: + elif not scale1.shape: # scalar scale1 try: - newscale = np.broadcast_to(scale0 * scale1, output.shape) # check broadcastable - newbias = np.broadcast_to(bias * (1 - scale0 * scale1), output.shape) + newscale = np.broadcast_to(scale1, output.shape) # check broadcastable + newbias = np.broadcast_to(bias * (1 - scale1), output.shape) can_propagate = True except ValueError: can_propagate = False @@ -305,7 +290,7 @@ def transform(self, model, node): return False # to remove warning, since these get set again - new_attrs = {k: v for k, v in in1.attributes.items() if k not in ('trace', 'precision')} + new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')} new_name = in1.name model.remove_node(in1) @@ -337,13 +322,28 @@ def transform(self, model, node): can_propagate = False if not bias0.shape and bias0 == 0 and not bias1.shape and bias1 == 0: - # zero bias, propagate through, if possible + # No zero offset, propagate through, if possible # (always possible if scale is scalar) if conv_nobias: try: if scale1.ndim > 1: # undo any broadcast_to - reduced_scale = _remove_redundant_dims(scale1) + reduced_scale0 = _remove_redundant_dims(scale0) if scale0.ndim > 1 else scale0 + reduced_scale1 = _remove_redundant_dims(scale1) + reduced_scale = reduced_scale0 @ reduced_scale1 + if reduced_scale.shape[-1] == 1: + reduced_scale = reduced_scale[..., 0] + if node.attributes['n_dim'] == 1: + scale_trans = np.transpose(reduced_scale, (1, 0)) + else: + scale_trans = np.transpose(reduced_scale, (1, 2, 0)) + newscale = np.broadcast_to(scale_trans, output.shape) # make sure broadcastable + can_propagate = True + elif scale0.ndim > 1: + # scale1 is scalar + # undo any broadcast_to + reduced_scale0 = _remove_redundant_dims(scale0) + reduced_scale = scale1 * reduced_scale0 if reduced_scale.shape[-1] == 1: reduced_scale = reduced_scale[..., 0] if node.attributes['n_dim'] == 1: @@ -353,16 +353,93 @@ def transform(self, model, node): newscale = np.broadcast_to(scale_trans, output.shape) # make sure broadcastable can_propagate = True else: - newscale = np.broadcast_to(scale1, output.shape) # make sure broadcastable + newscale = np.broadcast_to(scale0 * scale1, output.shape) # make sure broadcastable can_propagate = True newbias = np.zeros(output.shape) except ValueError: can_propagate = False - elif not scale1.shape: + elif not scale0.shape and not scale1.shape: + # scalar scale1 + try: + newscale = np.broadcast_to(scale0 * scale1, output.shape) # check broadcastable + newbias = np.broadcast_to(bias * (1 - scale0 * scale1), output.shape) + can_propagate = True + except ValueError: + can_propagate = False + if not can_propagate: + return False + + # to remove warning, since these get set again + new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')} + new_name = in1.name + model.remove_node(in0) + model.remove_node(in1) + + elif aa0 and not aa1 and aa2: + # datapath and bias have a scale + + can_propagate = False + if not bias0.shape and bias0 == 0 and not scale2.shape and not scale0.shape and scale2 == scale0: + # scalar scale0, no bais0 and scale2. + try: + newscale = np.broadcast_to(scale0, output.shape) # check broadcastable + newbias = np.broadcast_to(bias2, output.shape) + can_propagate = True + except ValueError: + can_propagate = False + if not can_propagate: + return False + + # to remove warning, since these get set again + new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')} + new_name = in0.name + model.remove_node(in0) + model.remove_node(in2) + + elif not aa0 and aa1 and aa2: + # only weights and bias have an ApplyAlpha + + can_propagate = False + if not bias1.shape and bias1 == 0 and not scale2.shape and not scale1.shape and scale2 == scale1: + # No zero offset, propagate through, if possible + # (always possible if scale is scalar) + if not scale1.shape: # scalar scale1 try: newscale = np.broadcast_to(scale1, output.shape) # check broadcastable - newbias = np.broadcast_to(bias * (1 - scale1), output.shape) + newbias = np.broadcast_to(bias2, output.shape) + can_propagate = True + except ValueError: + can_propagate = False + if not can_propagate: + return False + + # to remove warning, since these get set again + new_attrs = {k: v for k, v in in1.attributes.items() if k not in ('trace', 'precision')} + new_name = in1.name + model.remove_node(in1) + + elif aa0 and aa1 and aa2: + # have all + + can_propagate = False + if ( + not bias0.shape + and bias0 == 0 + and not bias1.shape + and bias1 == 0 + and not scale2.shape + and not scale1.shape + and not scale0.shape + and scale2 == scale1 * scale0 + ): + # No zero offset, propagate through, if possible + # (always possible if scale is scalar) + if not scale1.shape: + # scalar scale1 + try: + newscale = np.broadcast_to(scale0 * scale1, output.shape) # check broadcastable + newbias = np.broadcast_to(bias2, output.shape) can_propagate = True except ValueError: can_propagate = False From 766a14cf0775bfa52eb5d10a1a3cc27a4ab42d37 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 26 Sep 2024 16:11:36 -0500 Subject: [PATCH 57/62] accidentally reverted the example models --- example-models | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example-models b/example-models index ff74f73dbc..a81e36eb16 160000 --- a/example-models +++ b/example-models @@ -1 +1 @@ -Subproject commit ff74f73dbc253d1aa7de1603ee10ede551919548 +Subproject commit a81e36eb16593450d7661e7b9686666ddb397208 From 5ff1373d3db86239b9912a96b1a040958643790f Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Thu, 26 Sep 2024 16:35:56 -0500 Subject: [PATCH 58/62] some bug fixes --- example-models | 2 +- hls4ml/model/optimizer/__init__.py | 2 -- hls4ml/model/optimizer/passes/move_scales.py | 7 +++++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/example-models b/example-models index a81e36eb16..3cfbcfd062 160000 --- a/example-models +++ b/example-models @@ -1 +1 @@ -Subproject commit a81e36eb16593450d7661e7b9686666ddb397208 +Subproject commit 3cfbcfd062f60492507d21ff0e91559b3bdd6550 diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py index d82d45668d..0edd549b29 100644 --- a/hls4ml/model/optimizer/__init__.py +++ b/hls4ml/model/optimizer/__init__.py @@ -45,8 +45,6 @@ 'scale_down_add', 'bias_down_add', 'scale_down_mat_mul', - 'scale_down_weight_conv', - 'scale_down_bias_conv', 'scale_down_conv', 'merge_to_apply_alpha', 'merge_to_apply_alpha_div', diff --git a/hls4ml/model/optimizer/passes/move_scales.py b/hls4ml/model/optimizer/passes/move_scales.py index b2470f2839..43fcaa0da7 100644 --- a/hls4ml/model/optimizer/passes/move_scales.py +++ b/hls4ml/model/optimizer/passes/move_scales.py @@ -418,6 +418,7 @@ def transform(self, model, node): new_attrs = {k: v for k, v in in1.attributes.items() if k not in ('trace', 'precision')} new_name = in1.name model.remove_node(in1) + model.remove_node(in2) elif aa0 and aa1 and aa2: # have all @@ -447,9 +448,11 @@ def transform(self, model, node): return False # to remove warning, since these get set again - new_attrs = {k: v for k, v in in1.attributes.items() if k not in ('trace', 'precision')} - new_name = in1.name + new_attrs = {k: v for k, v in in0.attributes.items() if k not in ('trace', 'precision')} + new_name = in0.name + model.remove_node(in0) model.remove_node(in1) + model.remove_node(in2) # after the big if-else above new_attrs['scale_data'] = newscale From 86abdd236f74ce39af96a6f0fc868bc7246f49f2 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Sun, 29 Sep 2024 15:01:20 -0500 Subject: [PATCH 59/62] update qonnx sepconv test --- test/pytest/test_qonnx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py index 58d8b68fe2..75c6c95c3f 100644 --- a/test/pytest/test_qonnx.py +++ b/test/pytest/test_qonnx.py @@ -102,12 +102,12 @@ def test_sep_conv(sep_conv_model, backend): model = sep_conv_model ishape = tuple(model.get_tensor_shape(model.graph.input[0].name)) X = np.random.uniform(low=0, high=1, size=np.prod(ishape)).reshape(ishape) - # X = (np.round(X * 2**16) * 2**-16).astype(np.float32) + X = (np.round(X * 2**16) * 2**-16).astype(np.float32) idict = {model.graph.input[0].name: X} y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] config = hls4ml.utils.config.config_from_onnx_model( - model, granularity='name', backend=backend, default_precision='fixed<16,6>' + model, granularity='name', backend=backend, default_precision='fixed<32,16>' ) hls_model = hls4ml.converters.convert_from_onnx_model( From 583a8c2be59bec87a3377a2567c4e8d7a75754fe Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Wed, 23 Oct 2024 00:47:44 -0500 Subject: [PATCH 60/62] In softmax, max axis -1 if it's a positive index that's identical --- hls4ml/converters/onnx/core.py | 3 + test/pytest/test_qonnx.py | 114 +++++++++++++++++++++++---------- 2 files changed, 83 insertions(+), 34 deletions(-) diff --git a/hls4ml/converters/onnx/core.py b/hls4ml/converters/onnx/core.py index d84ba98a95..8ad851426d 100644 --- a/hls4ml/converters/onnx/core.py +++ b/hls4ml/converters/onnx/core.py @@ -62,6 +62,9 @@ def parse_activation_layer(node, input_names, input_shapes, graph): if layer['class_name'] == 'Softmax': layer['activation'] = 'softmax' layer['axis'] = get_onnx_attribute(node, 'axis', -1) + # because -1 is better supported than an explicit index, check if it's the same + if layer['axis'] == len(input_shapes[0]) - 1: + layer['axis'] = -1 elif layer['class_name'] in ['ELU', 'LeakyReLU', 'ThresholdedReLU']: layer['activation'] = layer['class_name'] diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py index 75c6c95c3f..c35b19f723 100644 --- a/test/pytest/test_qonnx.py +++ b/test/pytest/test_qonnx.py @@ -16,18 +16,7 @@ test_root_path = Path(__file__).parent example_model_path = (test_root_path / '../../example-models').resolve() - -@pytest.fixture(scope='module') -def sep_conv_model(): - """ - Load separabale conv model, already channels-last and cleaned - """ - dl_file = str(example_model_path / "onnx/separable_conv_model_ch_last.onnx") - assert os.path.isfile(dl_file) - - model = ModelWrapper(dl_file) - - return model +# The models @pytest.fixture(scope='module') @@ -97,31 +86,33 @@ def jettagging_model(): return model -@pytest.mark.parametrize('backend', ['Vitis']) -def test_sep_conv(sep_conv_model, backend): - model = sep_conv_model - ishape = tuple(model.get_tensor_shape(model.graph.input[0].name)) - X = np.random.uniform(low=0, high=1, size=np.prod(ishape)).reshape(ishape) - X = (np.round(X * 2**16) * 2**-16).astype(np.float32) - idict = {model.graph.input[0].name: X} - y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] +@pytest.fixture(scope='module') +def sep_conv_model(): + """ + Load separabale conv model, already channels-last and cleaned + """ + dl_file = str(example_model_path / "onnx/separable_conv_model_ch_last.onnx") + assert os.path.isfile(dl_file) - config = hls4ml.utils.config.config_from_onnx_model( - model, granularity='name', backend=backend, default_precision='fixed<32,16>' - ) + model = ModelWrapper(dl_file) - hls_model = hls4ml.converters.convert_from_onnx_model( - model, - output_dir=str(test_root_path / f'hls4mlprj_qonnx_sep_conv_{backend}'), - io_type='io_stream', - backend=backend, - hls_config=config, - ) - hls_model.compile() - y_hls4ml = hls_model.predict(np.ascontiguousarray(X)) + return model + + +@pytest.fixture(scope='module') +def three_layer_keras_model(): + """ + Load a simple, originally keras unquantized model + """ + dl_file = str(example_model_path / "onnx/three_layer_keras.onnx") + assert os.path.isfile(dl_file) + + model = ModelWrapper(dl_file) + model = qonnx.util.cleanup.cleanup_model(model) + return model - np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) - print('test') + +# The actual tests @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus']) @@ -197,3 +188,58 @@ def test_jet_tagging(jettagging_model, backend): y_hls4ml = hls_model.predict(X) np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) + + +@pytest.mark.parametrize('backend', ['Vitis']) +def test_sep_conv(sep_conv_model, backend): + model = sep_conv_model + ishape = tuple(model.get_tensor_shape(model.graph.input[0].name)) + X = np.random.uniform(low=0, high=1, size=np.prod(ishape)).reshape(ishape) + X = (np.round(X * 2**16) * 2**-16).astype(np.float32) + idict = {model.graph.input[0].name: X} + y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] + + config = hls4ml.utils.config.config_from_onnx_model( + model, granularity='name', backend=backend, default_precision='fixed<32,16>' + ) + + hls_model = hls4ml.converters.convert_from_onnx_model( + model, + output_dir=str(test_root_path / f'hls4mlprj_qonnx_sep_conv_{backend}'), + io_type='io_stream', + backend=backend, + hls_config=config, + ) + hls_model.compile() + y_hls4ml = hls_model.predict(np.ascontiguousarray(X)) + + np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) + + +@pytest.mark.parametrize('backend', ['Vitis']) +@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream']) +def test_three_layer_keras(three_layer_keras_model, io_type, backend): + model = three_layer_keras_model + ishape = tuple(model.get_tensor_shape(model.graph.input[0].name)) + X = np.random.uniform(low=0, high=1, size=np.prod(ishape)).reshape(ishape) + X = (np.round(X * 2**16) * 2**-16).astype(np.float32) + idict = {model.graph.input[0].name: X} + y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] + + config = hls4ml.utils.config.config_from_onnx_model( + model, granularity='name', backend=backend, default_precision='fixed<32,16>' + ) + + config['LayerName']['Softmax_0']['Implementation'] = 'legacy' + + hls_model = hls4ml.converters.convert_from_onnx_model( + model, + output_dir=str(test_root_path / f'hls4mlprj_onnx_three_layer_keras_{io_type}_{backend}'), + io_type=io_type, + backend=backend, + hls_config=config, + ) + hls_model.compile() + y_hls4ml = hls_model.predict(X) + + np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) From 9cbf0f1c9f81417fe6be4fec7add6bf64690a010 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Wed, 23 Oct 2024 15:50:37 -0500 Subject: [PATCH 61/62] add more onnx tests, optimize the handling of some attributes, update example model version --- example-models | 2 +- .../model/optimizer/passes/batchnorm_opt.py | 2 +- .../model/optimizer/passes/conv_to_convxd.py | 2 +- .../passes/conv_to_depthwiseconvxd.py | 2 +- test/pytest/test_qonnx.py | 125 +++++++++++++++++- 5 files changed, 122 insertions(+), 11 deletions(-) diff --git a/example-models b/example-models index 3cfbcfd062..d40894b03f 160000 --- a/example-models +++ b/example-models @@ -1 +1 @@ -Subproject commit 3cfbcfd062f60492507d21ff0e91559b3bdd6550 +Subproject commit d40894b03f840a32da43a5adea0531ffc1db216e diff --git a/hls4ml/model/optimizer/passes/batchnorm_opt.py b/hls4ml/model/optimizer/passes/batchnorm_opt.py index b6c21c7267..e18d79ff4a 100644 --- a/hls4ml/model/optimizer/passes/batchnorm_opt.py +++ b/hls4ml/model/optimizer/passes/batchnorm_opt.py @@ -28,7 +28,7 @@ def transform(self, model, node): if not (len(node.inputs) == 5 and all(node.inputs)): raise ValueError('All 5 BatchNormOnnnx inputs need to be defined') - attributes = {k: node.attributes.get(k, None) for k in _base_attributes} + attributes = {k: node.attributes[k] for k in _base_attributes if k in node.attributes} gamma_node = node.get_input_node(node.inputs[1]) if not isinstance(gamma_node, Constant): diff --git a/hls4ml/model/optimizer/passes/conv_to_convxd.py b/hls4ml/model/optimizer/passes/conv_to_convxd.py index 25ac50ba40..3e870e43a6 100644 --- a/hls4ml/model/optimizer/passes/conv_to_convxd.py +++ b/hls4ml/model/optimizer/passes/conv_to_convxd.py @@ -54,7 +54,7 @@ def transform(self, model, node): bias_node = node.get_input_node(node.inputs[2]) # creating the attributes - attributes = {k: node.attributes.get(k, None) for k in _base_attributes} + attributes = {k: node.attributes[k] for k in _base_attributes if k in node.attributes} # The ConvxD nodes expect the weight data to be in a different format, not (M, k1.., C) if node.attributes['n_dim'] == 1: diff --git a/hls4ml/model/optimizer/passes/conv_to_depthwiseconvxd.py b/hls4ml/model/optimizer/passes/conv_to_depthwiseconvxd.py index 26603c6a64..b1271b5784 100644 --- a/hls4ml/model/optimizer/passes/conv_to_depthwiseconvxd.py +++ b/hls4ml/model/optimizer/passes/conv_to_depthwiseconvxd.py @@ -55,7 +55,7 @@ def transform(self, model, node): bias_node = node.get_input_node(node.inputs[2]) # creating the attributes - attributes = {k: node.attributes.get(k, None) for k in _base_attributes} + attributes = {k: node.attributes[k] for k in _base_attributes if k in node.attributes} # The ConvxD nodes expect the weight data to be in a different format, not (M, k1.., C) if node.attributes['n_dim'] == 1: diff --git a/test/pytest/test_qonnx.py b/test/pytest/test_qonnx.py index c35b19f723..f822c591a7 100644 --- a/test/pytest/test_qonnx.py +++ b/test/pytest/test_qonnx.py @@ -10,6 +10,8 @@ # To conveniently run QONNX inference from qonnx.core.modelwrapper import ModelWrapper +from qonnx.transformation.channels_last import ConvertToChannelsLastAndClean +from qonnx.transformation.gemm_to_matmul import GemmToMatMul import hls4ml @@ -99,10 +101,23 @@ def sep_conv_model(): return model +@pytest.fixture(scope='module') +def two_layer_keras_model(): + """ + Load a simple, two-layer, originally keras, unquantized model + """ + dl_file = str(example_model_path / "onnx/two_layer_keras.onnx") + assert os.path.isfile(dl_file) + + model = ModelWrapper(dl_file) + model = qonnx.util.cleanup.cleanup_model(model) + return model + + @pytest.fixture(scope='module') def three_layer_keras_model(): """ - Load a simple, originally keras unquantized model + Load a simple, three-layer, originally keras, unquantized model """ dl_file = str(example_model_path / "onnx/three_layer_keras.onnx") assert os.path.isfile(dl_file) @@ -112,6 +127,84 @@ def three_layer_keras_model(): return model +@pytest.fixture(scope='module') +def two_layer_pytorch_model(): + """ + Load a simple, two-layer, originally pytorch, unquantized model + """ + dl_file = str(example_model_path / "onnx/two_layer_keras.onnx") + assert os.path.isfile(dl_file) + + model = ModelWrapper(dl_file) + model = qonnx.util.cleanup.cleanup_model(model) + model = model.transform(GemmToMatMul()) + model = qonnx.util.cleanup.cleanup_model(model) + return model + + +@pytest.fixture(scope='module') +def three_layer_pytorch_model(): + """ + Load a simple, three-layer, originally pytorch, unquantized model + """ + dl_file = str(example_model_path / "onnx/three_layer_pytorch.onnx") + assert os.path.isfile(dl_file) + + model = ModelWrapper(dl_file) + model = qonnx.util.cleanup.cleanup_model(model) + model = model.transform(GemmToMatMul()) + model = qonnx.util.cleanup.cleanup_model(model) + return model + + +@pytest.fixture(scope='module') +def conv1d_small_keras_model(): + """ + Load a simple conv1d, originally keras, unquantized model + """ + dl_file = str(example_model_path / "onnx/conv1d_small_keras.onnx") + assert os.path.isfile(dl_file) + + model = ModelWrapper(dl_file) + model = qonnx.util.cleanup.cleanup_model(model) + model = model.transform(ConvertToChannelsLastAndClean()) + model = model.transform(GemmToMatMul()) + model = qonnx.util.cleanup.cleanup_model(model) + return model + + +@pytest.fixture(scope='module') +def conv2d_small_keras_model(): + """ + Load a simple conv2d, originally keras, unquantized model + """ + dl_file = str(example_model_path / "onnx/conv2d_small_keras.onnx") + assert os.path.isfile(dl_file) + + model = ModelWrapper(dl_file) + model = qonnx.util.cleanup.cleanup_model(model) + model = model.transform(ConvertToChannelsLastAndClean()) + model = model.transform(GemmToMatMul()) + model = qonnx.util.cleanup.cleanup_model(model) + return model + + +@pytest.fixture(scope='module') +def conv2d_small_mp_keras_model(): + """ + Load a conv2d model with max pooling, originally keras, unquantized model + """ + dl_file = str(example_model_path / "onnx/conv2d_small_mp_keras.onnx") + assert os.path.isfile(dl_file) + + model = ModelWrapper(dl_file) + model = qonnx.util.cleanup.cleanup_model(model) + model = model.transform(ConvertToChannelsLastAndClean()) + model = model.transform(GemmToMatMul()) + model = qonnx.util.cleanup.cleanup_model(model) + return model + + # The actual tests @@ -216,25 +309,43 @@ def test_sep_conv(sep_conv_model, backend): np.testing.assert_allclose(y_qonnx.ravel(), y_hls4ml.ravel(), atol=1e-2, rtol=1) +@pytest.mark.parametrize( + 'model_name', + [ + 'two_layer_keras_model', + 'three_layer_keras_model', + 'two_layer_pytorch_model', + 'three_layer_pytorch_model', + 'conv1d_small_keras_model', + 'conv2d_small_keras_model', + 'conv2d_small_mp_keras_model', + ], +) @pytest.mark.parametrize('backend', ['Vitis']) @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream']) -def test_three_layer_keras(three_layer_keras_model, io_type, backend): - model = three_layer_keras_model +def test_simple_model(model_name, io_type, backend, request): + if model_name == 'conv2d_small_mp_keras_model' and io_type == 'io_stream': + # Not yet supported due to an issue with channels last conversion + # There is a qonnx PR. + pytest.skip() + model = request.getfixturevalue(model_name) ishape = tuple(model.get_tensor_shape(model.graph.input[0].name)) X = np.random.uniform(low=0, high=1, size=np.prod(ishape)).reshape(ishape) - X = (np.round(X * 2**16) * 2**-16).astype(np.float32) + X = (np.round(X * 2**10) * 2**-10).astype(np.float32) idict = {model.graph.input[0].name: X} y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name] config = hls4ml.utils.config.config_from_onnx_model( - model, granularity='name', backend=backend, default_precision='fixed<32,16>' + model, granularity='name', backend=backend, default_precision='fixed<16,6>' ) - config['LayerName']['Softmax_0']['Implementation'] = 'legacy' + for layer in config['LayerName']: + if layer.startswith('Softmax'): + config['LayerName'][layer]['Implementation'] = 'legacy' hls_model = hls4ml.converters.convert_from_onnx_model( model, - output_dir=str(test_root_path / f'hls4mlprj_onnx_three_layer_keras_{io_type}_{backend}'), + output_dir=str(test_root_path / f'hls4mlprj_onnx_{model_name}_{io_type}_{backend}'), io_type=io_type, backend=backend, hls_config=config, From 3ec6c5a4dfb40dc57d25d4c34e96668f5a0b36b7 Mon Sep 17 00:00:00 2001 From: Jovan Mitrevski Date: Wed, 23 Oct 2024 19:51:31 -0500 Subject: [PATCH 62/62] update qonnx documentation --- docs/advanced/qonnx.rst | 56 +++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 1 + 2 files changed, 57 insertions(+) create mode 100644 docs/advanced/qonnx.rst diff --git a/docs/advanced/qonnx.rst b/docs/advanced/qonnx.rst new file mode 100644 index 0000000000..09b0074a0b --- /dev/null +++ b/docs/advanced/qonnx.rst @@ -0,0 +1,56 @@ +============== +ONNX and QONNX +============== + +Parsing of ONNX and QONNX models is made in conjunction with the `qonnx `_ package, even if it no quantization is used. This is a common initial parser shared with the AMD/Xilinx FINN project. The first step is to do constant folding, shape inference, etc., on the ONNX graph, commonly known as `cleaning`. If a model has convolution layers, the model also needs to be converted to a channels-last format, since that is what hls4ml mainly supports. The ``qonnx`` package also provides a number of additional transforms that may need to be used. For example, ``Gemm`` nodes need to converted to ``MatMul`` and ``Add`` nodes. + +There are command-line based versions of cleaning and channels-last conversion: + +.. code-block:: bash + + $ qonnx_clean filename.onnx + $ qonnx_to_channels_last filename_clean.onnx + $ qonnx_clean filename_clean_channels_last.onnx # good to do a clean again as a last step + +Things can similarly be done in python. This method is usually easier if you additionally need to call other transforms. An example is given below which also calls the ``GemmToMatMul`` converter: + +.. code-block:: python + + model = ModelWrapper('filename.onnx') + model = qonnx.util.cleanup.cleanup_model(model) + model = model.transform(ConvertToChannelsLastAndClean()) + model = model.transform(GemmToMatMul()) + model = qonnx.util.cleanup.cleanup_model(model) + +``ModelWrapper`` is defined in ``qonnx.core.modelwrapper``. More information on the ``qonnx`` package can be found at the `QONNX documentation page `_. + + +The next steps are very similar to if you are using a Keras model: + +.. code-block:: python + + config = hls4ml.utils.config.config_from_onnx_model( + model, granularity='name', backend='Vitis', default_precision='fixed<16,6>' + ) + # modify the config as desired + hls_model = hls4ml.converters.convert_from_onnx_model( + model, + output_dir='my-hls-test', + io_type='io_stream', + backend='Vitis', + hls_config=config, + ) + hls_model.compile() + +Note, unlike the Keras version, "name" granularity is the default for ``config_from_onnx_model``, and it must be used for QONNX models. Unquantized ONNX models can use "model" if so desired, but generally there is no benefit. + +One can subsequently call the ``predict`` function to check the performance or build the project. + +Note that ``execute_onnx`` in ``qonnx.core.onnx_exec`` can be use to run the QONNX graphs directly, and it also provides the values at intermediate layers for validating the model (tracing). + +Quant nodes +=========== + +Documentation for quant nodes is provided in the `qonnx package `_. Note that currently hls4ml only supports the `Quant operator `_. Also, not all legal ``Quant`` configurations are parsable by hls4ml or synthesizable. The ``scale``, ``zeropt``, and ``bitwidth`` values must be constant (though not necessarily scalar for the ``scale`` and ``zeropt``). + +Generally if the ``zeropt`` is 0 and the ``scale`` is a scalar power of 2, hls4ml uses ``ap_fixed`` or ``ac_fixed`` types (depending on the backend) to represent the quantizations. In other cases, the ``scale`` and ``zeropt`` need to be explicitly handled by hls4ml, and there is more of a chance of hls4ml not being able to process the input. (Please report any issues that you find.) diff --git a/docs/index.rst b/docs/index.rst index c21b90aebc..b2f7e2501b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -22,6 +22,7 @@ :hidden: :caption: Advanced Features + advanced/qonnx advanced/fifo_depth advanced/extension advanced/accelerator