From a57246ec7ef74f743fe11c5f924b3321af7467da Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 30 May 2024 16:29:03 -0500
Subject: [PATCH 01/19] make auto the default for layers

---
 hls4ml/utils/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hls4ml/utils/config.py b/hls4ml/utils/config.py
index 7294dcf6fe..5c5daacf60 100644
--- a/hls4ml/utils/config.py
+++ b/hls4ml/utils/config.py
@@ -182,7 +182,7 @@ def make_layer_config(layer):
                 if name.endswith('_t'):
                     name = name[:-2]
                 if attr.default is None:
-                    precision_cfg[name] = default_precision
+                    precision_cfg[name] = 'auto'
                 else:
                     precision_cfg[name] = str(attr.default)
             else:

From 38f522feaf78fa76084cbf497f421201f99ac79e Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Mon, 3 Jun 2024 16:33:58 -0500
Subject: [PATCH 02/19] add max_precision, not currently used

---
 hls4ml/utils/config.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/hls4ml/utils/config.py b/hls4ml/utils/config.py
index 5c5daacf60..6bda042619 100644
--- a/hls4ml/utils/config.py
+++ b/hls4ml/utils/config.py
@@ -112,7 +112,7 @@ def _get_precision_from_quantizer(quantizer):
 
 
 def config_from_keras_model(
-    model, granularity='model', backend=None, default_precision='fixed<16,6>', default_reuse_factor=1
+    model, granularity='model', backend=None, default_precision='fixed<16,6>', default_reuse_factor=1, max_precision=None
 ):
     """Create an HLS conversion config given the Keras model.
 
@@ -134,6 +134,8 @@ def config_from_keras_model(
         backend(str, optional): Name of the backend to use
         default_precision (str, optional): Default precision to use. Defaults to 'fixed<16,6>'.
         default_reuse_factor (int, optional): Default reuse factor. Defaults to 1.
+        max_precision (str or None, optional): Maximum width precision to use. Defaults to None, meaning no maximum.
+            Note:  Only integer and fixed precisions are supported
 
     Raises:
         Exception: If Keras model has layers not supported by hls4ml.
@@ -238,7 +240,10 @@ def make_layer_config(layer):
     config = {}
 
     model_config = {}
-    model_config['Precision'] = default_precision
+    model_config['Precision'] = {}
+    model_config['Precision']['default'] = default_precision
+    if max_precision is not None:
+        model_config['Precision']['maximum'] = max_precision
     model_config['ReuseFactor'] = default_reuse_factor
     model_config['Strategy'] = 'Latency'
     model_config['BramFactor'] = 1_000_000_000

From 8510705674928710035dcf529134df3c90c39721 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 13 Jun 2024 22:17:19 -0500
Subject: [PATCH 03/19] add maximum precision in standard precision inference

---
 hls4ml/model/optimizer/passes/infer_precision.py | 13 +++++++++++++
 test/pytest/test_cnn_mnist.py                    |  4 +++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py
index 51422c534e..df533f2ce6 100644
--- a/hls4ml/model/optimizer/passes/infer_precision.py
+++ b/hls4ml/model/optimizer/passes/infer_precision.py
@@ -76,6 +76,13 @@ def _get_default_precision(self, node):
         model_config = node.model.config
         return model_config.backend.convert_precision_string(model_config.model_precision['default'])
 
+    def _get_maximum_precision(self, node):
+        model_config = node.model.config
+        if 'maximum' in model_config.model_precision:
+            return model_config.backend.convert_precision_string(model_config.model_precision['maximum'])
+        else:
+            return None
+
     def _infer_default_type(self, node, type_name):
         model_config = node.model.config
         default_precision = model_config.backend.convert_precision_string(model_config.model_precision['default'])
@@ -141,6 +148,12 @@ def _infer_common_precision(self, node, types_to_infer, n_ops):
             bitwidth = integers + max(frac, bias_width - bias_integers)
             signed = signed or bias_signed
 
+        # if max_precision is specified, limit the size to be less than max precisoin
+        max_precision = self._get_maximum_precision(node)
+        if max_precision is not None:
+            bitwidth = min(bitwidth, max_precision.width)
+            integers = min(integers, max_precision.integer)
+
         # Note:  this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form.
         new_type = FixedPrecisionType(bitwidth, integers, signed)
 
diff --git a/test/pytest/test_cnn_mnist.py b/test/pytest/test_cnn_mnist.py
index ab3365f228..562a0f08db 100644
--- a/test/pytest/test_cnn_mnist.py
+++ b/test/pytest/test_cnn_mnist.py
@@ -67,7 +67,9 @@ def keras_model(mnist_data):
 def test_mnist_cnn(keras_model, mnist_data, backend, io_type, strategy):
     x_train, y_train, x_test, y_test = mnist_data
 
-    hls_config = hls4ml.utils.config_from_keras_model(keras_model, granularity='name', backend=backend)
+    hls_config = hls4ml.utils.config_from_keras_model(
+        keras_model, granularity='name', backend=backend, max_precision='fixed<24,16>'
+    )
     hls_config['Model']['Strategy'] = strategy
     hls_config['LayerName']['softmax']['Implementation'] = 'stable'
     output_dir = str(test_root_path / f'hls4mlprj_cnn_mnist_{backend}_{io_type}_{strategy}')

From 68796dd3cda336e35fa335f88da94bc92534aa08 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 17 Jul 2024 12:36:34 -0500
Subject: [PATCH 04/19] minimal handling of other types in infer_precision
 (e.g. for binary)

---
 .../model/optimizer/passes/infer_precision.py | 239 +++++++++++-------
 1 file changed, 143 insertions(+), 96 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py
index df533f2ce6..c79a06ddfe 100644
--- a/hls4ml/model/optimizer/passes/infer_precision.py
+++ b/hls4ml/model/optimizer/passes/infer_precision.py
@@ -1,9 +1,10 @@
 import math
+from typing import Iterable
 
 import numpy as np
 
 from hls4ml.model.optimizer import ConfigurableOptimizerPass
-from hls4ml.model.types import FixedPrecisionType, UnspecifiedPrecisionType
+from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, PrecisionType, UnspecifiedPrecisionType
 
 # TODO:  The code assumes everything is Fixed or Integer precision. Need to add checks
 
@@ -83,6 +84,13 @@ def _get_maximum_precision(self, node):
         else:
             return None
 
+    def _all_supported_types(self, types: Iterable[PrecisionType]):
+        """Are all the types supported for inference--currently Integer or Fixed"""
+        for tp in types:
+            if not isinstance(tp, (IntegerPrecisionType, FixedPrecisionType)):
+                return False
+        return True
+
     def _infer_default_type(self, node, type_name):
         model_config = node.model.config
         default_precision = model_config.backend.convert_precision_string(model_config.model_precision['default'])
@@ -103,9 +111,6 @@ def _infer_common_precision(self, node, types_to_infer, n_ops):
         inferred_types = []
 
         input_precision = node.get_input_variable().type.precision
-        input_width = input_precision.width
-        input_integers = input_precision.integer
-        input_signed = input_precision.signed
 
         if 'weight_t' in types_to_infer:
             weight_quantizer = node.get_attr('weight_quantizer', None)
@@ -117,10 +122,6 @@ def _infer_common_precision(self, node, types_to_infer, n_ops):
             node.weights['weight'].update_precision(node.types['weight_t'].precision)
             inferred_types.append('weight_t')
 
-        weight_width = node.types['weight_t'].precision.width
-        weight_integers = node.types['weight_t'].precision.integer
-        weight_signed = node.types['weight_t'].precision.signed
-
         if 'bias_t' in types_to_infer:
             bias_quantizer = node.get_attr('bias_quantizer', None)
             if bias_quantizer is not None:
@@ -131,31 +132,42 @@ def _infer_common_precision(self, node, types_to_infer, n_ops):
             node.weights['bias'].update_precision(node.types['bias_t'].precision)
             inferred_types.append('bias_t')
 
-        bias_width = node.types['bias_t'].precision.width
-        bias_integers = node.types['bias_t'].precision.integer
-        bias_signed = node.types['bias_t'].precision.signed
-        no_bias = node.weights['bias'].nonzeros == 0 and self.infer_no_bias  # no bias
+        if self._all_supported_types((input_precision, node.types['weight_t'].precision, node.types['bias_t'].precision)):
+            input_width = input_precision.width
+            input_integers = input_precision.integer
+            input_signed = input_precision.signed
 
-        # using math.ceil instead of np.ceil because it returns an int
-        bitwidth = weight_width + input_width + math.ceil(np.log2(n_ops))
-        integers = weight_integers + input_integers + math.ceil(np.log2(n_ops))
-        signed = weight_signed or input_signed
+            weight_width = node.types['weight_t'].precision.width
+            weight_integers = node.types['weight_t'].precision.integer
+            weight_signed = node.types['weight_t'].precision.signed
 
-        frac = bitwidth - integers
+            bias_width = node.types['bias_t'].precision.width
+            bias_integers = node.types['bias_t'].precision.integer
+            bias_signed = node.types['bias_t'].precision.signed
+            no_bias = node.weights['bias'].nonzeros == 0 and self.infer_no_bias  # no bias
 
-        if not no_bias:
-            integers = max(integers + (bias_signed and not signed), bias_integers + (signed and not bias_signed)) + 1
-            bitwidth = integers + max(frac, bias_width - bias_integers)
-            signed = signed or bias_signed
+            # using math.ceil instead of np.ceil because it returns an int
+            bitwidth = weight_width + input_width + math.ceil(np.log2(n_ops))
+            integers = weight_integers + input_integers + math.ceil(np.log2(n_ops))
+            signed = weight_signed or input_signed
 
-        # if max_precision is specified, limit the size to be less than max precisoin
-        max_precision = self._get_maximum_precision(node)
-        if max_precision is not None:
-            bitwidth = min(bitwidth, max_precision.width)
-            integers = min(integers, max_precision.integer)
+            frac = bitwidth - integers
 
-        # Note:  this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form.
-        new_type = FixedPrecisionType(bitwidth, integers, signed)
+            if not no_bias:
+                integers = max(integers + (bias_signed and not signed), bias_integers + (signed and not bias_signed)) + 1
+                bitwidth = integers + max(frac, bias_width - bias_integers)
+                signed = signed or bias_signed
+
+            # if max_precision is specified, limit the size to be less than max precisoin
+            max_precision = self._get_maximum_precision(node)
+            if max_precision is not None:
+                bitwidth = min(bitwidth, max_precision.width)
+                integers = min(integers, max_precision.integer)
+
+            # Note:  this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form.
+            new_type = FixedPrecisionType(bitwidth, integers, signed)
+        else:
+            new_type = self._get_default_precision(node)
 
         if 'accum_t' in types_to_infer:
             node.types['accum_t'].name = node.name + '_accum_t'
@@ -278,24 +290,29 @@ def _infer_bn_precision(self, node, types_to_infer):
             scale_precision = node.types['scale_t'].precision
             bias_precision = node.types['bias_t'].precision
 
-            after_scale_signed = scale_precision.signed or input_precision.signed
-            after_scale_width = input_precision.width + scale_precision.width
-            after_scale_integer = input_precision.integer + scale_precision.integer
+            if self._all_supported_types((input_precision, scale_precision, bias_precision)):
 
-            out_precision_signed = after_scale_signed or bias_precision.signed
-            out_precision_integer = (
-                max(
-                    after_scale_integer + (bias_precision.signed and not after_scale_signed),
-                    bias_precision.integer + (after_scale_signed and not bias_precision.signed),
+                after_scale_signed = scale_precision.signed or input_precision.signed
+                after_scale_width = input_precision.width + scale_precision.width
+                after_scale_integer = input_precision.integer + scale_precision.integer
+
+                out_precision_signed = after_scale_signed or bias_precision.signed
+                out_precision_integer = (
+                    max(
+                        after_scale_integer + (bias_precision.signed and not after_scale_signed),
+                        bias_precision.integer + (after_scale_signed and not bias_precision.signed),
+                    )
+                    + 1
+                )
+                out_precision_width = out_precision_integer + max(
+                    after_scale_width - after_scale_integer, bias_precision.fractional
                 )
-                + 1
-            )
-            out_precision_width = out_precision_integer + max(
-                after_scale_width - after_scale_integer, bias_precision.fractional
-            )
 
-            # Note:  this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form.
-            out_precision = FixedPrecisionType(out_precision_width, out_precision_integer, out_precision_signed)
+                # Note:  this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form.
+                out_precision = FixedPrecisionType(out_precision_width, out_precision_integer, out_precision_signed)
+
+            else:
+                out_precision = self._get_default_precision(node)
 
             node.types['result_t'].name = node.name + '_result_t'
             node.types['result_t'].precision = out_precision
@@ -311,20 +328,28 @@ def _infer_pooling_precision(self, node, types_to_infer):
             input_precision = node.get_input_variable().type.precision
             pool_op = node.attributes['pool_op'].lower()
 
-            width = input_precision.width
-            integer = input_precision.integer
-            signed = input_precision.signed
+            if pool_op == 'max':
+                # This has the benefit of working for xnor types. I don't think "copy" is needed
+                accum_type = input_precision
+
+            elif pool_op == 'average':
+                if self._all_supported_types((input_precision,)):
+                    width = input_precision.width
+                    integer = input_precision.integer
+                    signed = input_precision.signed
+
+                    pool_size = node.get_attr('pool_height', 1) * node.get_attr('pool_width')
+                    extra_bits = int(np.ceil(np.log2(pool_size)))
+
+                    accum_type = FixedPrecisionType(
+                        width=width + extra_bits * 2, integer=integer + extra_bits, signed=signed
+                    )
+                else:
+                    accum_type = self._get_default_precision(node)
 
-            pool_size = node.get_attr('pool_height', 1) * node.get_attr('pool_width')
-            if pool_op == 'average':
-                extra_bits = int(np.ceil(np.log2(pool_size)))
-            elif pool_op == 'max':
-                extra_bits = 0
             else:
                 raise ValueError(f'Unknown pooling operation: {pool_op}')
 
-            accum_type = FixedPrecisionType(width=width + extra_bits * 2, integer=integer + extra_bits, signed=signed)
-
             node.types['accum_t'].name = node.name + '_accum_t'
             node.types['accum_t'].precision = accum_type
 
@@ -344,36 +369,48 @@ def _infer_merge_precision(self, node, types_to_infer):
 
         op = node.get_attr('op').lower()
         if op in ('add', 'subtract', 'average'):
-            new_signed = input_1.signed or input_2.signed or op == 'subtract'
-            new_int = (
-                max(
-                    input_1.integer + (input_2.signed and not input_1.signed),
-                    input_2.integer + (input_1.signed and not input_2.signed),
+            if self._all_supported_types((input_1, input_2)):
+                new_signed = input_1.signed or input_2.signed or op == 'subtract'
+                new_int = (
+                    max(
+                        input_1.integer + (input_2.signed and not input_1.signed),
+                        input_2.integer + (input_1.signed and not input_2.signed),
+                    )
+                    + 1
                 )
-                + 1
-            )
-            new_width = new_int + max(input_1.fractional, input_2.fractional)
-            out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+                new_width = new_int + max(input_1.fractional, input_2.fractional)
+                out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+            else:
+                out_precision = self._get_default_precision(node)
         elif op == 'multiply':
-            new_signed = input_1.signed or input_2.signed
-            new_int = input_1.integer + input_2.integer
-            new_width = input_1.width + input_2.width
-            out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+            if self._all_supported_types((input_1, input_2)):
+                new_signed = input_1.signed or input_2.signed
+                new_int = input_1.integer + input_2.integer
+                new_width = input_1.width + input_2.width
+                out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+            else:
+                out_precision = self._get_default_precision(node)
         elif op in ('maximum', 'minimum'):
-            new_signed = input_1.signed or input_2.signed
-
-            input_1_integer = input_1.integer
-            input_2_integer = input_2.integer
-
-            # add one to integer if unsigned while new is signed
-            if new_signed and not input_1.signed:
-                input_1_integer += 1
-            if new_signed and not input_2.signed:
-                input_2_integer += 1
-
-            new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer)
-            new_int = max(input_1_integer, input_2_integer)
-            out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+            if input_1 == input_2:
+                # can handle binary and potentially others
+                out_precision = input_1  # I assume copy is not necessary
+            elif self._all_supported_types((input_1, input_2)):
+                new_signed = input_1.signed or input_2.signed
+
+                input_1_integer = input_1.integer
+                input_2_integer = input_2.integer
+
+                # add one to integer if unsigned while new is signed
+                if new_signed and not input_1.signed:
+                    input_1_integer += 1
+                if new_signed and not input_2.signed:
+                    input_2_integer += 1
+
+                new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer)
+                new_int = max(input_1_integer, input_2_integer)
+                out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+            else:
+                out_precision = self._get_default_precision(node)
         else:
             print(f'Warning: not propagating weights for type {op}')
             out_precision = self._get_default_precision(node)
@@ -389,21 +426,28 @@ def _infer_cat_precision(self, node, types_to_infer):
         input_1 = node.get_input_variable(node.inputs[0]).type.precision
         input_2 = node.get_input_variable(node.inputs[1]).type.precision
 
-        new_signed = input_1.signed or input_2.signed
+        if input_1 == input_2:
+            # can handle binary and potentially others
+            out_precision = input_1  # I assume copy is not necessary
+        elif self._all_supported_types((input_1, input_2)):
+            new_signed = input_1.signed or input_2.signed
+
+            input_1_integer = input_1.integer
+            input_2_integer = input_2.integer
 
-        input_1_integer = input_1.integer
-        input_2_integer = input_2.integer
+            # add one to integer if unsigned while new is signed
+            if new_signed and not input_1.signed:
+                input_1_integer += 1
+            if new_signed and not input_2.signed:
+                input_2_integer += 1
 
-        # add one to integer if unsigned while new is signed
-        if new_signed and not input_1.signed:
-            input_1_integer += 1
-        if new_signed and not input_2.signed:
-            input_2_integer += 1
+            new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer)
+            new_int = max(input_1_integer, input_2_integer)
 
-        new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer)
-        new_int = max(input_1_integer, input_2_integer)
+            out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+        else:
+            out_precision = self._get_default_precision(node)
 
-        out_precision = FixedPrecisionType(new_width, new_int, new_signed)
         node.types['result_t'].name = node.name + '_result_t'
         node.types['result_t'].precision = out_precision
 
@@ -415,13 +459,16 @@ def _infer_dot_precision(self, node, types_to_infer):
         input_1 = node.get_input_variable(node.inputs[0]).type.precision
         input_2 = node.get_input_variable(node.inputs[1]).type.precision
 
-        n_in = node.get_input_variable(node.inputs[0]).shape[0]
+        if self._all_supported_types((input_1, input_2)):
+            n_in = node.get_input_variable(node.inputs[0]).shape[0]
 
-        new_signed = input_1.signed or input_2.signed
-        new_width = input_1.width + input_2.width + math.ceil(np.log2(n_in))
-        new_int = input_1.integer + input_2.integer + math.ceil(np.log2(n_in))
+            new_signed = input_1.signed or input_2.signed
+            new_width = input_1.width + input_2.width + math.ceil(np.log2(n_in))
+            new_int = input_1.integer + input_2.integer + math.ceil(np.log2(n_in))
 
-        out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+            out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+        else:
+            out_precision = self._get_default_precision(node)
         node.types['result_t'].name = node.name + '_result_t'
         node.types['result_t'].precision = out_precision
 

From 807fbe539c18df71390f10a438649dce0c0cd808 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 17 Jul 2024 14:53:27 -0500
Subject: [PATCH 05/19] add more checks for max precision

---
 .../model/optimizer/passes/infer_precision.py | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py
index c79a06ddfe..9bd8f1fb64 100644
--- a/hls4ml/model/optimizer/passes/infer_precision.py
+++ b/hls4ml/model/optimizer/passes/infer_precision.py
@@ -191,6 +191,7 @@ def _infer_conv_precision(self, node, types_to_infer):
         n_ops = node.get_attr('n_chan') * node.get_attr('filt_height', 1) * node.get_attr('filt_width')
         return self._infer_common_precision(node, types_to_infer, n_ops)
 
+    # This function is ignored because we will split sepconv in the future
     def _infer_sepconv_precision(self, node, types_to_infer):
         inferred_types = []
 
@@ -308,6 +309,12 @@ def _infer_bn_precision(self, node, types_to_infer):
                     after_scale_width - after_scale_integer, bias_precision.fractional
                 )
 
+                # if max_precision is specified, limit the size to be less than max precisoin
+                max_precision = self._get_maximum_precision(node)
+                if max_precision is not None:
+                    out_precision_width = min(out_precision_width, max_precision.width)
+                    out_precision_integer = min(out_precision_integer, max_precision.integer)
+
                 # Note:  this is guaranteed to not overflow or need rounding, so it's sufficient to use the simpler form.
                 out_precision = FixedPrecisionType(out_precision_width, out_precision_integer, out_precision_signed)
 
@@ -341,6 +348,7 @@ def _infer_pooling_precision(self, node, types_to_infer):
                     pool_size = node.get_attr('pool_height', 1) * node.get_attr('pool_width')
                     extra_bits = int(np.ceil(np.log2(pool_size)))
 
+                    # for now ignore max precision in this case
                     accum_type = FixedPrecisionType(
                         width=width + extra_bits * 2, integer=integer + extra_bits, signed=signed
                     )
@@ -379,6 +387,10 @@ def _infer_merge_precision(self, node, types_to_infer):
                     + 1
                 )
                 new_width = new_int + max(input_1.fractional, input_2.fractional)
+                max_precision = self._get_maximum_precision(node)
+                if max_precision is not None:
+                    new_width = min(new_width, max_precision.width)
+                    new_int = min(new_int, max_precision.integer)
                 out_precision = FixedPrecisionType(new_width, new_int, new_signed)
             else:
                 out_precision = self._get_default_precision(node)
@@ -387,6 +399,11 @@ def _infer_merge_precision(self, node, types_to_infer):
                 new_signed = input_1.signed or input_2.signed
                 new_int = input_1.integer + input_2.integer
                 new_width = input_1.width + input_2.width
+                # if max_precision is specified, limit the size to be less than max precisoin
+                max_precision = self._get_maximum_precision(node)
+                if max_precision is not None:
+                    new_width = min(new_width, max_precision.width)
+                    new_int = min(new_int, max_precision.integer)
                 out_precision = FixedPrecisionType(new_width, new_int, new_signed)
             else:
                 out_precision = self._get_default_precision(node)
@@ -444,6 +461,12 @@ def _infer_cat_precision(self, node, types_to_infer):
             new_width = max(input_1.fractional, input_2.fractional) + max(input_1_integer, input_2_integer)
             new_int = max(input_1_integer, input_2_integer)
 
+            # if max_precision is specified, limit the size to be less than max precisoin
+            max_precision = self._get_maximum_precision(node)
+            if max_precision is not None:
+                new_width = min(new_width, max_precision.width)
+                new_int = min(new_int, max_precision.integer)
+
             out_precision = FixedPrecisionType(new_width, new_int, new_signed)
         else:
             out_precision = self._get_default_precision(node)
@@ -466,6 +489,12 @@ def _infer_dot_precision(self, node, types_to_infer):
             new_width = input_1.width + input_2.width + math.ceil(np.log2(n_in))
             new_int = input_1.integer + input_2.integer + math.ceil(np.log2(n_in))
 
+            # if max_precision is specified, limit the size to be less than max precisoin
+            max_precision = self._get_maximum_precision(node)
+            if max_precision is not None:
+                new_width = min(new_width, max_precision.width)
+                new_int = min(new_int, max_precision.integer)
+
             out_precision = FixedPrecisionType(new_width, new_int, new_signed)
         else:
             out_precision = self._get_default_precision(node)

From 92dc478c1dae456f4d709b840255d22c440d71cd Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 17 Jul 2024 14:55:54 -0500
Subject: [PATCH 06/19] fix the incorrect setting of reuse factors

---
 hls4ml/utils/config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hls4ml/utils/config.py b/hls4ml/utils/config.py
index 6bda042619..d60478ef97 100644
--- a/hls4ml/utils/config.py
+++ b/hls4ml/utils/config.py
@@ -187,6 +187,8 @@ def make_layer_config(layer):
                     precision_cfg[name] = 'auto'
                 else:
                     precision_cfg[name] = str(attr.default)
+            elif attr.name == 'reuse_factor':
+                layer_config[attr.config_name] = default_reuse_factor
             else:
                 if attr.default is not None:
                     layer_config[attr.config_name] = attr.default

From b29705ecb7267f5f80e9f19f9b5cbbf6b6604c25 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 17 Jul 2024 16:39:59 -0500
Subject: [PATCH 07/19] update tests to pass backend to config_from_*

---
 test/pytest/test_batchnorm.py         |  4 ++-
 test/pytest/test_batchnorm_pytorch.py |  4 ++-
 test/pytest/test_binary_cnn.py        |  4 ++-
 test/pytest/test_causalpadding.py     |  4 ++-
 test/pytest/test_clone_flatten.py     |  4 +--
 test/pytest/test_cnn_mnist.py         |  2 +-
 test/pytest/test_cnn_mnist_qkeras.py  |  2 +-
 test/pytest/test_embed.py             |  4 ++-
 test/pytest/test_garnet.py            |  4 +--
 test/pytest/test_globalpooling.py     |  8 +++--
 test/pytest/test_keras_api.py         |  6 ++--
 test/pytest/test_pointwiseconv.py     |  2 +-
 test/pytest/test_pooling.py           |  8 +++--
 test/pytest/test_qkeras.py            | 42 +++++++++++++++++----------
 test/pytest/test_rnn.py               |  2 +-
 test/pytest/test_softmax.py           |  4 +--
 test/pytest/test_softsign.py          |  2 +-
 test/pytest/test_trace.py             |  2 +-
 test/pytest/test_transpose_concat.py  |  2 +-
 test/pytest/test_upsampling.py        |  4 ++-
 test/pytest/test_zeropadding.py       |  4 ++-
 21 files changed, 76 insertions(+), 42 deletions(-)

diff --git a/test/pytest/test_batchnorm.py b/test/pytest/test_batchnorm.py
index 727d2ee574..15774fa395 100644
--- a/test/pytest/test_batchnorm.py
+++ b/test/pytest/test_batchnorm.py
@@ -36,7 +36,9 @@ def test_batchnorm(model, data, backend, io_type):
 
     center = model.layers[0].center
     scale = model.layers[0].scale
-    config = hls4ml.utils.config_from_keras_model(model, default_precision=default_precision, granularity='name')
+    config = hls4ml.utils.config_from_keras_model(
+        model, default_precision=default_precision, granularity='name', backend=backend
+    )
     output_dir = str(test_root_path / f'hls4mlprj_batchnorm_{backend}_{io_type}_center{center}_scale{scale}')
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, backend=backend, hls_config=config, io_type=io_type, output_dir=output_dir
diff --git a/test/pytest/test_batchnorm_pytorch.py b/test/pytest/test_batchnorm_pytorch.py
index 93cda2729c..0ffc6dc25e 100644
--- a/test/pytest/test_batchnorm_pytorch.py
+++ b/test/pytest/test_batchnorm_pytorch.py
@@ -30,7 +30,9 @@ def test_batchnorm(data, backend, io_type):
 
     default_precision = 'ac_fixed<32, 1, true>' if backend == 'Quartus' else 'ac_fixed<32, 1>'
 
-    config = hls4ml.utils.config_from_pytorch_model(model, default_precision=default_precision, granularity='name')
+    config = hls4ml.utils.config_from_pytorch_model(
+        model, default_precision=default_precision, granularity='name', backend=backend
+    )
     output_dir = str(test_root_path / f'hls4mlprj_batchnorm_{backend}_{io_type}')
     hls_model = hls4ml.converters.convert_from_pytorch_model(
         model, (None, in_shape), backend=backend, hls_config=config, io_type=io_type, output_dir=output_dir
diff --git a/test/pytest/test_binary_cnn.py b/test/pytest/test_binary_cnn.py
index 40af056df9..c1fa1b1551 100644
--- a/test/pytest/test_binary_cnn.py
+++ b/test/pytest/test_binary_cnn.py
@@ -66,7 +66,9 @@ def test_binary_cnn(backend, io_type, strategy):
 
     model2.summary()
 
-    hls_config = hls4ml.utils.config_from_keras_model(model2, granularity='name', default_precision='fixed<32,12>')
+    hls_config = hls4ml.utils.config_from_keras_model(
+        model2, granularity='name', default_precision='fixed<32,12>', backend=backend
+    )
     hls_config['Model']['Strategy'] = strategy
 
     # hls_config['LayerName']['q_dense_7_softmax']['Implementation'] = 'legacy'
diff --git a/test/pytest/test_causalpadding.py b/test/pytest/test_causalpadding.py
index c076c99987..d91da35fac 100644
--- a/test/pytest/test_causalpadding.py
+++ b/test/pytest/test_causalpadding.py
@@ -23,7 +23,9 @@ def test_causalpadding(io_type, backend):
     data = np.expand_dims(data, axis=0)
     data = np.expand_dims(data, axis=-1)
 
-    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,16>', granularity='name')
+    config = hls4ml.utils.config_from_keras_model(
+        model, default_precision='ap_fixed<32,16>', granularity='name', backend=backend
+    )
     odir = str(test_root_path / f'hls4mlprj_validpadding_{backend}_{io_type}')
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, hls_config=config, io_type=io_type, output_dir=odir, backend=backend
diff --git a/test/pytest/test_clone_flatten.py b/test/pytest/test_clone_flatten.py
index 5f631d027f..d819af54e7 100644
--- a/test/pytest/test_clone_flatten.py
+++ b/test/pytest/test_clone_flatten.py
@@ -31,9 +31,7 @@ def keras_model():
 @pytest.mark.parametrize('backend', ['Vivado', 'Quartus', 'Catapult'])
 def hls_model(keras_model, backend, io_type):
     hls_config = hls4ml.utils.config_from_keras_model(
-        keras_model,
-        default_precision='ap_int<6>',
-        granularity='name',
+        keras_model, default_precision='ap_int<6>', granularity='name', backend=backend
     )
     output_dir = str(test_root_path / f'hls4mlprj_clone_flatten_{backend}_{io_type}')
     hls_model = hls4ml.converters.convert_from_keras_model(
diff --git a/test/pytest/test_cnn_mnist.py b/test/pytest/test_cnn_mnist.py
index 05f02da003..47ab27a665 100644
--- a/test/pytest/test_cnn_mnist.py
+++ b/test/pytest/test_cnn_mnist.py
@@ -68,7 +68,7 @@ def test_mnist_cnn(keras_model, mnist_data, backend, io_type, strategy):
     x_train, y_train, x_test, y_test = mnist_data
 
     hls_config = hls4ml.utils.config_from_keras_model(
-        keras_model, granularity='name', backend=backend, max_precision='fixed<24,16>'
+        keras_model, granularity='name', backend=backend, max_precision='fixed<24,16>', backend=backend
     )
     hls_config['Model']['Strategy'] = strategy
     hls_config['LayerName']['average_pooling2d']['Precision']['accum'] = 'auto'
diff --git a/test/pytest/test_cnn_mnist_qkeras.py b/test/pytest/test_cnn_mnist_qkeras.py
index b4c28c70d1..38489b5865 100644
--- a/test/pytest/test_cnn_mnist_qkeras.py
+++ b/test/pytest/test_cnn_mnist_qkeras.py
@@ -58,7 +58,7 @@ def mnist_model():
 )
 def hls_model(mnist_model, backend, io_type, strategy):
     keras_model = mnist_model
-    hls_config = hls4ml.utils.config_from_keras_model(keras_model, granularity='name')
+    hls_config = hls4ml.utils.config_from_keras_model(keras_model, granularity='name', backend=backend)
     hls_config['Model']['Strategy'] = strategy
     hls_config['LayerName']['softmax']['Strategy'] = 'Stable'
     output_dir = str(test_root_path / f'hls4mlprj_cnn_mnist_qkeras_{backend}_{io_type}_{strategy}')
diff --git a/test/pytest/test_embed.py b/test/pytest/test_embed.py
index a27fc45b93..c045629a40 100644
--- a/test/pytest/test_embed.py
+++ b/test/pytest/test_embed.py
@@ -28,7 +28,9 @@ def keras_model():
 @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus', 'Catapult'])
 @pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
 def hls_model(keras_model, backend, io_type):
-    hls_config = hls4ml.utils.config_from_keras_model(keras_model, default_precision='ap_fixed<16,6>', granularity='name')
+    hls_config = hls4ml.utils.config_from_keras_model(
+        keras_model, default_precision='ap_fixed<16,6>', granularity='name', backend=backend
+    )
     hls_config['LayerName']['embedding_input']['Precision']['result'] = 'ap_uint<4>'
     out_dir = str(test_root_path / 'hls4mlprj_embed_{}_{}').format(backend, io_type)
     hls_model = hls4ml.converters.convert_from_keras_model(
diff --git a/test/pytest/test_garnet.py b/test/pytest/test_garnet.py
index 67ddf77182..62bc82a8c0 100644
--- a/test/pytest/test_garnet.py
+++ b/test/pytest/test_garnet.py
@@ -33,7 +33,7 @@ def garnet_models():
     model = Model(inputs=inputs, outputs=outputs)
     model.summary()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend='Vivado')
     config['Model'] = {}
     config['Model']['ReuseFactor'] = 1
     config['Model']['Strategy'] = 'Latency'
@@ -68,7 +68,7 @@ def garnet_stack_models():
     model = Model(inputs=inputs, outputs=outputs)
     model.summary()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend='Vivado')
     config['Model'] = {}
     config['Model']['ReuseFactor'] = 1
     config['Model']['Strategy'] = 'Latency'
diff --git a/test/pytest/test_globalpooling.py b/test/pytest/test_globalpooling.py
index b99f0d8212..d0b635595a 100644
--- a/test/pytest/test_globalpooling.py
+++ b/test/pytest/test_globalpooling.py
@@ -53,7 +53,9 @@ def keras_model_1d(request):
 def test_global_pool1d(backend, keras_model_1d, data_1d, io_type):
     model, model_type, keepdims = keras_model_1d
 
-    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,9>', granularity='name')
+    config = hls4ml.utils.config_from_keras_model(
+        model, default_precision='ap_fixed<32,9>', granularity='name', backend=backend
+    )
 
     hls_model = hls4ml.converters.convert_from_keras_model(
         model,
@@ -108,7 +110,9 @@ def keras_model_2d(request):
 def test_global_pool2d(backend, keras_model_2d, data_2d, io_type):
     model, model_type, keepdims = keras_model_2d
 
-    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,9>', granularity='name')
+    config = hls4ml.utils.config_from_keras_model(
+        model, default_precision='ap_fixed<32,9>', granularity='name', backend=backend
+    )
 
     hls_model = hls4ml.converters.convert_from_keras_model(
         model,
diff --git a/test/pytest/test_keras_api.py b/test/pytest/test_keras_api.py
index b9f2d35f1a..6f00b2ec00 100644
--- a/test/pytest/test_keras_api.py
+++ b/test/pytest/test_keras_api.py
@@ -310,7 +310,9 @@ def test_depthwise2d(backend, io_type):
     model.add(DepthwiseConv2D(kernel_size=(3, 3), input_shape=(32, 32, 3)))
     model.compile()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='fixed<32,12>')
+    config = hls4ml.utils.config_from_keras_model(
+        model, granularity='name', default_precision='fixed<32,12>', backend=backend
+    )
     output_dir = str(test_root_path / f'hls4mlprj_keras_api_depthwiseconv2d_{backend}_{io_type}')
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type
@@ -336,7 +338,7 @@ def test_depthwise1d(backend, io_type):
     model.add(DepthwiseConv1D(kernel_size=3, input_shape=(32, 3)))
     model.compile()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend=backend)
     output_dir = str(test_root_path / f'hls4mlprj_keras_api_depthwiseconv1d_{backend}_{io_type}')
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type
diff --git a/test/pytest/test_pointwiseconv.py b/test/pytest/test_pointwiseconv.py
index 060b9877de..d7f9281b38 100644
--- a/test/pytest/test_pointwiseconv.py
+++ b/test/pytest/test_pointwiseconv.py
@@ -154,7 +154,7 @@ def test_pointwise_config(strategy):
 
     model.compile(optimizer='adam', loss='mse')
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend='Vivado')
     config['Model']['Strategy'] = strategy
     config['LayerName']['conv2d_1x1']['Strategy'] = strategy  # Will fail if the strategy is not lowercase
     output_dir = str(test_root_path / f'hls4mlprj_pointwise2d_config_{strategy}')
diff --git a/test/pytest/test_pooling.py b/test/pytest/test_pooling.py
index d7de80a5a7..7a10cd2733 100644
--- a/test/pytest/test_pooling.py
+++ b/test/pytest/test_pooling.py
@@ -53,7 +53,9 @@ def keras_model_1d(request):
 def test_pool1d(backend, keras_model_1d, data_1d, io_type):
     model, model_type, padding = keras_model_1d
 
-    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,9>', granularity='name')
+    config = hls4ml.utils.config_from_keras_model(
+        model, default_precision='ap_fixed<32,9>', granularity='name', backend=backend
+    )
 
     hls_model = hls4ml.converters.convert_from_keras_model(
         model,
@@ -108,7 +110,9 @@ def keras_model_2d(request):
 def test_pool2d(backend, keras_model_2d, data_2d, io_type):
     model, model_type, padding = keras_model_2d
 
-    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,9>', granularity='name')
+    config = hls4ml.utils.config_from_keras_model(
+        model, default_precision='ap_fixed<32,9>', granularity='name', backend=backend
+    )
 
     hls_model = hls4ml.converters.convert_from_keras_model(
         model,
diff --git a/test/pytest/test_qkeras.py b/test/pytest/test_qkeras.py
index 45d015807b..d3a446ccca 100644
--- a/test/pytest/test_qkeras.py
+++ b/test/pytest/test_qkeras.py
@@ -77,7 +77,7 @@ def convert(load_jettagging_model, strategy):
     '''
     model = load_jettagging_model
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend='Vivado')
     config['Model']['Strategy'] = strategy
     config['LayerName']['softmax']['exp_table_t'] = 'ap_fixed<18,8>'
     config['LayerName']['softmax']['inv_table_t'] = 'ap_fixed<18,4>'
@@ -156,7 +156,7 @@ def test_single_dense_activation_exact(randX_100_16, bits, alpha, backend, io_ty
     model.add(QActivation(activation=quantized_relu(bits, 0), name='relu1'))
     model.compile()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend=backend)
     output_dir = str(test_root_path / f'hls4mlprj_qkeras_single_dense_activation_exact_{bits}_{alpha}_{backend}_{io_type}')
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type
@@ -205,7 +205,7 @@ def test_quantizer_special(randX_1000_1, quantizer, backend, io_type):
     model.add(QActivation(input_shape=(1,), activation=quantizer, name='quantizer'))
     model.compile()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend=backend)
     output_dir = str(
         test_root_path / f'hls4mlprj_qkeras_quantizer_{quantizer.__class__.__name__}_{quantizer.bits}_{backend}_{io_type}'
     )
@@ -289,7 +289,7 @@ def test_quantizer(randX_1000_1, quantizer, backend, io_type):
     model.add(QActivation(input_shape=(1,), activation=quantizer, name='quantizer'))
     model.compile()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend=backend)
     output_dir = str(
         test_root_path
         / 'hls4mlprj_qkeras_quantizer_{}_{}_{}_{}_{}'.format(
@@ -328,7 +328,7 @@ def test_relu_negative_slope(randX_1000_1, quantizer, backend, io_type):
     model.add(QActivation(input_shape=(1,), activation=quantizer, name='quantizer'))
     model.compile()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend=backend)
     output_dir = str(
         test_root_path
         / 'hls4mlprj_qkeras_leaky_relu_{}_{}_neg_slope_{}_{}_{}'.format(
@@ -373,7 +373,7 @@ def test_qactivation_kwarg(randX_100_10, activation_quantizer, weight_quantizer)
     )(inputs)
     model = Model(inputs, outputs)
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend='Vivado')
 
     out_dir = str(test_root_path / f'hls4mlprj_qactivation_kwarg_{activation_quantizer}')
 
@@ -418,7 +418,9 @@ def test_quantizer_parsing(randX_100_10, backend, io_type):
     )
     model.compile()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='fixed<24,8>')
+    config = hls4ml.utils.config_from_keras_model(
+        model, granularity='name', default_precision='fixed<24,8>', backend=backend
+    )
     output_dir = str(test_root_path / f'hls4mlprj_qkeras_quant_parse_{backend}_{io_type}')
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type
@@ -459,7 +461,9 @@ def test_qconv2dbn(randX_100_8_8_1, backend, io_type):
     )
     model.compile()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='fixed<24,8>')
+    config = hls4ml.utils.config_from_keras_model(
+        model, granularity='name', default_precision='fixed<24,8>', backend=backend
+    )
     output_dir = str(test_root_path / f'hls4mlprj_qkeras_qconv2dbn_{backend}_{io_type}')
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type
@@ -500,7 +504,9 @@ def test_qdepthwiseconv2d(randX_10_32_32_3, backend, io_type):
     )
     model.compile()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='fixed<24,8>')
+    config = hls4ml.utils.config_from_keras_model(
+        model, granularity='name', default_precision='fixed<24,8>', backend=backend
+    )
     output_dir = str(test_root_path / f'hls4mlprj_qkeras_qdepthwiseconv2d_{backend}_{io_type}')
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type
@@ -538,7 +544,7 @@ def test_quantised_po2_bit_width(backend, io_type, strategy):
     y_keras = keras_model.predict(X)
 
     hls_config = hls4ml.utils.config_from_keras_model(
-        keras_model, granularity='name', default_precision='ap_fixed<64, 32>', default_reuse_factor=1
+        keras_model, granularity='name', default_precision='ap_fixed<64, 32>', default_reuse_factor=1, backend=backend
     )
     hls_config['Model']['Strategy'] = strategy
     output_dir = str(test_root_path / f'hls4mlprj_qkeras_quantised_po2_{backend}_{io_type}_{strategy}')
@@ -573,7 +579,9 @@ def test_qsimplernn(backend):
     )
     model.compile()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision="ap_fixed<16,1>")
+    config = hls4ml.utils.config_from_keras_model(
+        model, granularity='name', default_precision="ap_fixed<16,1>", backend=backend
+    )
     output_dir = str(test_root_path / f'hls4mlprj_qkeras_qsimplernn_{backend}')
     hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, backend=backend)
     hls_model.compile()
@@ -607,7 +615,9 @@ def test_qlstm(backend):
     )
     model.compile()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision="ap_fixed<8,1>")
+    config = hls4ml.utils.config_from_keras_model(
+        model, granularity='name', default_precision="ap_fixed<8,1>", backend=backend
+    )
     output_dir = str(test_root_path / f'hls4mlprj_qkeras_qsimplernn_{backend}')
     hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, backend=backend)
     hls_model.compile()
@@ -642,7 +652,9 @@ def test_qgru(backend):
     )
     model.compile()
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision="ap_fixed<8,1>")
+    config = hls4ml.utils.config_from_keras_model(
+        model, granularity='name', default_precision="ap_fixed<8,1>", backend=backend
+    )
     output_dir = str(test_root_path / f'hls4mlprj_qkeras_qsimplernn_{backend}')
     hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, backend=backend)
     hls_model.compile()
@@ -671,7 +683,7 @@ def test_qseparableconv1d(backend, io_type):
     model = Model(inputs=x_in, outputs=x)
 
     config = hls4ml.utils.config_from_keras_model(
-        model, granularity='name', backend=backend, default_precision='fixed<23,7>'
+        model, granularity='name', backend=backend, default_precision='fixed<23,7>', backend=backend
     )
 
     # Use 8 bits for input
@@ -717,7 +729,7 @@ def test_qseparableconv2d(backend, io_type):
     model = Model(inputs=x_in, outputs=x)
 
     config = hls4ml.utils.config_from_keras_model(
-        model, granularity='name', backend=backend, default_precision='fixed<23,7>'
+        model, granularity='name', backend=backend, default_precision='fixed<23,7>', backend=backend
     )
 
     # Use 8 bits for input
diff --git a/test/pytest/test_rnn.py b/test/pytest/test_rnn.py
index 3e6e978011..4cea296d8d 100644
--- a/test/pytest/test_rnn.py
+++ b/test/pytest/test_rnn.py
@@ -25,7 +25,7 @@ def test_rnn_parsing(rnn_layer, return_sequences):
     model = Model(model_input, model_output)
     model.compile(optimizer='adam', loss='mse')
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend='Vivado')
     prj_name = f'hls4mlprj_rnn_{rnn_layer.__class__.__name__.lower()}_seq_{int(return_sequences)}'
     output_dir = str(test_root_path / prj_name)
     hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir)
diff --git a/test/pytest/test_softmax.py b/test/pytest/test_softmax.py
index 19c9042465..048b6832ee 100644
--- a/test/pytest/test_softmax.py
+++ b/test/pytest/test_softmax.py
@@ -41,7 +41,7 @@ def test_softmax(backend, strategy, generate_data, input_bits, input_shape, tabl
 
     table_type = f'fixed<{table_bits}, RND, SAT>'
 
-    cfg = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    cfg = hls4ml.utils.config_from_keras_model(model, granularity='name', backend=backend)
     cfg['LayerName']['softmax']['Strategy'] = strategy
     cfg['LayerName']['softmax']['inv_table_t'] = table_type
     cfg['LayerName']['softmax']['exp_table_t'] = table_type
@@ -74,7 +74,7 @@ def test_softmax_skipped(backend, io_type):
     model = tf.keras.models.Sequential([dense, softmax])
     model.compile()
 
-    cfg = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    cfg = hls4ml.utils.config_from_keras_model(model, granularity='name', backend=backend)
     cfg['LayerName']['softmax']['skip'] = True
 
     odir = str(test_root_path / 'hls4mlprj_softmax_skipped_{}_{}').format(backend, io_type)
diff --git a/test/pytest/test_softsign.py b/test/pytest/test_softsign.py
index 31a2a1c2cf..f0089438a4 100644
--- a/test/pytest/test_softsign.py
+++ b/test/pytest/test_softsign.py
@@ -19,7 +19,7 @@ def test_softsign(backend, input_shape, io_type):
     model.add(tf.keras.layers.Activation(input_shape=input_shape, activation='softsign', name='softsign'))
     model.compile()
 
-    cfg = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='fixed<20,4>')
+    cfg = hls4ml.utils.config_from_keras_model(model, granularity='name', default_precision='fixed<20,4>', backend=backend)
     # Since softsign implementation is lookup-based increasing the precision and size of the table helps with accuracy
     cfg['LayerName']['softsign']['table_t'] = 'fixed<20,4>'
     cfg['LayerName']['softsign']['table_size'] = 2048
diff --git a/test/pytest/test_trace.py b/test/pytest/test_trace.py
index 14e218fd1c..3068c3e5fc 100644
--- a/test/pytest/test_trace.py
+++ b/test/pytest/test_trace.py
@@ -39,7 +39,7 @@ def test_trace(backend, activation):
 
     keras_prediction = model.predict(X_input)
 
-    config = hls4ml.utils.config_from_keras_model(model, granularity='name')
+    config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend=backend)
     for layer in config['LayerName'].keys():
         config['LayerName'][layer]['Trace'] = True
 
diff --git a/test/pytest/test_transpose_concat.py b/test/pytest/test_transpose_concat.py
index db3e03125f..7447545d2f 100644
--- a/test/pytest/test_transpose_concat.py
+++ b/test/pytest/test_transpose_concat.py
@@ -32,7 +32,7 @@ def keras_model():
 @pytest.mark.parametrize('backend', ['Vivado', 'Vitis', 'Quartus'])
 def hls_model(keras_model, backend, io_type):
     hls_config = hls4ml.utils.config_from_keras_model(
-        keras_model, default_precision='ap_fixed<16,3,AP_RND_CONV,AP_SAT>', granularity='name'
+        keras_model, default_precision='ap_fixed<16,3,AP_RND_CONV,AP_SAT>', granularity='name', backend=backend
     )
     hls_config['LayerName']['relu']['Precision'] = 'ap_ufixed<17,3>'
     output_dir = str(test_root_path / f'hls4mlprj_transpose_{backend}_{io_type}')
diff --git a/test/pytest/test_upsampling.py b/test/pytest/test_upsampling.py
index 9051d582bd..c81be76933 100644
--- a/test/pytest/test_upsampling.py
+++ b/test/pytest/test_upsampling.py
@@ -56,7 +56,9 @@ def test_upsampling(keras_model_1d, keras_model_2d, data_1d, data_2d, model_type
         model = keras_model_2d
         data = data_2d
 
-    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,1>', granularity='name')
+    config = hls4ml.utils.config_from_keras_model(
+        model, default_precision='ap_fixed<32,1>', granularity='name', backend=backend
+    )
     odir = str(test_root_path / f'hls4mlprj_upsampling_{model_type}_{backend}_{io_type}')
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, hls_config=config, io_type=io_type, output_dir=odir, backend=backend
diff --git a/test/pytest/test_zeropadding.py b/test/pytest/test_zeropadding.py
index 95f7d79a7d..6a22a22472 100644
--- a/test/pytest/test_zeropadding.py
+++ b/test/pytest/test_zeropadding.py
@@ -60,7 +60,9 @@ def test_zeropadding(keras_model_1d, keras_model_2d, data_1d, data_2d, model_typ
         model = keras_model_2d
         data = data_2d
 
-    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,1>', granularity='name')
+    config = hls4ml.utils.config_from_keras_model(
+        model, default_precision='ap_fixed<32,1>', granularity='name', backend=backend
+    )
     odir = str(test_root_path / f'hls4mlprj_zeropadding_{model_type}_{backend}_{io_type}')
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, hls_config=config, io_type=io_type, output_dir=odir, backend=backend

From 94149608e12f096b6bbc089f9f3bedd42f910ff6 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 17 Jul 2024 17:05:24 -0500
Subject: [PATCH 08/19] fix parameters syntax error introduced in pytest commit

---
 test/pytest/test_cnn_mnist.py | 2 +-
 test/pytest/test_qkeras.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/pytest/test_cnn_mnist.py b/test/pytest/test_cnn_mnist.py
index 47ab27a665..05f02da003 100644
--- a/test/pytest/test_cnn_mnist.py
+++ b/test/pytest/test_cnn_mnist.py
@@ -68,7 +68,7 @@ def test_mnist_cnn(keras_model, mnist_data, backend, io_type, strategy):
     x_train, y_train, x_test, y_test = mnist_data
 
     hls_config = hls4ml.utils.config_from_keras_model(
-        keras_model, granularity='name', backend=backend, max_precision='fixed<24,16>', backend=backend
+        keras_model, granularity='name', backend=backend, max_precision='fixed<24,16>'
     )
     hls_config['Model']['Strategy'] = strategy
     hls_config['LayerName']['average_pooling2d']['Precision']['accum'] = 'auto'
diff --git a/test/pytest/test_qkeras.py b/test/pytest/test_qkeras.py
index d3a446ccca..aa0aa8fb2e 100644
--- a/test/pytest/test_qkeras.py
+++ b/test/pytest/test_qkeras.py
@@ -683,7 +683,7 @@ def test_qseparableconv1d(backend, io_type):
     model = Model(inputs=x_in, outputs=x)
 
     config = hls4ml.utils.config_from_keras_model(
-        model, granularity='name', backend=backend, default_precision='fixed<23,7>', backend=backend
+        model, granularity='name', backend=backend, default_precision='fixed<23,7>'
     )
 
     # Use 8 bits for input

From a5a36da864e1981e062ae3a4f51997e6b4e43cb0 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 17 Jul 2024 22:24:04 -0500
Subject: [PATCH 09/19] add basic type inference for embedding

---
 .../model/optimizer/passes/infer_precision.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py
index 9bd8f1fb64..4879945d63 100644
--- a/hls4ml/model/optimizer/passes/infer_precision.py
+++ b/hls4ml/model/optimizer/passes/infer_precision.py
@@ -68,6 +68,9 @@ def _infer_precision(self, node, types_to_infer):
         if node_class in ['Dot']:
             return self._infer_dot_precision(node, types_to_infer)
 
+        if node_class in ['Embedding']:
+            return self._infer_embedding_precision(node, types_to_infer)
+
         # What about quantized activation layer? Setting it to 'auto' manually will break it here. We should prevent
         # this in config_from_* functions
 
@@ -502,3 +505,19 @@ def _infer_dot_precision(self, node, types_to_infer):
         node.types['result_t'].precision = out_precision
 
         return ['result_t']
+
+    def _infer_embedding_precision(self, node, types_to_infer):
+        inferred_types = []
+
+        if 'embeddings_t' in types_to_infer:
+            self._infer_default_type(node, 'embeddings_t')
+            node.weights['embeddings'].update_precision(node.types['embeddings_t'].precision)
+            inferred_types.append('embeddings_t')
+
+        if 'result_t' in types_to_infer:
+            out_precision = self._get_default_precision(node)
+            node.types['result_t'].name = node.name + '_result_t'
+            node.types['result_t'].precision = out_precision
+            inferred_types.append('result_t')
+
+        return inferred_types

From 141cb2bc0b1cfc3d59fadc3ef38c66b9a08f0052 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 18 Jul 2024 12:49:02 -0500
Subject: [PATCH 10/19] add placeholder precision inference for rnn

---
 hls4ml/model/optimizer/passes/infer_precision.py | 16 ++++++++++++++++
 test/pytest/test_rnn.py                          |  7 ++++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py
index 4879945d63..5c1801156f 100644
--- a/hls4ml/model/optimizer/passes/infer_precision.py
+++ b/hls4ml/model/optimizer/passes/infer_precision.py
@@ -71,6 +71,9 @@ def _infer_precision(self, node, types_to_infer):
         if node_class in ['Embedding']:
             return self._infer_embedding_precision(node, types_to_infer)
 
+        if node_class in ['SimpleRNN', 'LSTM', 'GRU']:
+            return self._infer_rnn_precision(node, types_to_infer)
+
         # What about quantized activation layer? Setting it to 'auto' manually will break it here. We should prevent
         # this in config_from_* functions
 
@@ -521,3 +524,16 @@ def _infer_embedding_precision(self, node, types_to_infer):
             inferred_types.append('result_t')
 
         return inferred_types
+
+    # TODO:  This is just a placeholder
+    def _infer_rnn_precision(self, node, types_to_infer):
+        inferred_types = []
+
+        # for now just do the weights and leave the rest for the default catch
+        for weightvar in ('weight', 'bias', 'recurrent_weight', 'recurrent_bias'):
+            if f'{weightvar}_t' in types_to_infer:
+                self._infer_default_type(node, f'{weightvar}_t')
+                node.weights[weightvar].update_precision(node.types[f'{weightvar}_t'].precision)
+                inferred_types.append(f'{weightvar}_t')
+
+        return inferred_types
diff --git a/test/pytest/test_rnn.py b/test/pytest/test_rnn.py
index 4cea296d8d..dc991f7f55 100644
--- a/test/pytest/test_rnn.py
+++ b/test/pytest/test_rnn.py
@@ -90,7 +90,7 @@ def test_rnn_accuracy(rnn_layer, return_sequences, backend, io_type, strategy, s
     input_shape = (12, 8)
     X = np.random.rand(50, *input_shape) - 0.5
 
-    layer_name = rnn_layer.__class__.__name__.lower()
+    layer_name = rnn_layer.__name__
     keras_model = Sequential()
     keras_model.add(
         rnn_layer(
@@ -111,8 +111,9 @@ def test_rnn_accuracy(rnn_layer, return_sequences, backend, io_type, strategy, s
     )
     hls_config['LayerName'][layer_name]['static'] = static
     hls_config['LayerName'][layer_name]['Strategy'] = strategy
-    prj_name = 'hls4mlprj_rnn_accuracy_{}_static_{}_ret_seq_{}_{}_{}_{}'.format(
-        rnn_layer.__class__.__name__.lower(), int(static), int(return_sequences), backend, io_type, strategy
+    prj_name = (
+        f'hls4mlprj_rnn_accuracy_{layer_name}_static_{int(static)}_ret_seq_{int(return_sequences)}_'
+        f'{backend}_{io_type}_{strategy}'
     )
     output_dir = str(test_root_path / prj_name)
 

From 6d2a5f5ffbd34a8167c382974d2e33450c4b9087 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 18 Jul 2024 13:55:29 -0500
Subject: [PATCH 11/19] fix syntax error in test_qkeras

---
 test/pytest/test_qkeras.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/pytest/test_qkeras.py b/test/pytest/test_qkeras.py
index aa0aa8fb2e..a6cdaabcac 100644
--- a/test/pytest/test_qkeras.py
+++ b/test/pytest/test_qkeras.py
@@ -729,7 +729,7 @@ def test_qseparableconv2d(backend, io_type):
     model = Model(inputs=x_in, outputs=x)
 
     config = hls4ml.utils.config_from_keras_model(
-        model, granularity='name', backend=backend, default_precision='fixed<23,7>', backend=backend
+        model, granularity='name', backend=backend, default_precision='fixed<23,7>'
     )
 
     # Use 8 bits for input

From bc29e0f1c13d33ba4e5c68d17f8c15918534dbdc Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 18 Jul 2024 14:15:46 -0500
Subject: [PATCH 12/19] fix up test_trace

---
 hls4ml/model/optimizer/__init__.py | 1 +
 test/pytest/test_trace.py          | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 3aa247d03f..f505cfb089 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -33,6 +33,7 @@
 register_flow(
     'convert',
     [
+        'eliminate_linear_activation',  # needs to be before infer_precision_types
         'infer_precision_types',
         'channels_last_converter',
         'remove_transpose_before_flatten',
diff --git a/test/pytest/test_trace.py b/test/pytest/test_trace.py
index 3068c3e5fc..b01cfcd010 100644
--- a/test/pytest/test_trace.py
+++ b/test/pytest/test_trace.py
@@ -43,7 +43,7 @@ def test_trace(backend, activation):
     for layer in config['LayerName'].keys():
         config['LayerName'][layer]['Trace'] = True
 
-    output_dir = str(test_root_path / f'hls4mlprj_trace_{backend}')
+    output_dir = str(test_root_path / f'hls4mlprj_trace_{backend}_{activation}')
 
     hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=output_dir, backend=backend)
 

From e42d0d8c44627657c8e68fea1dad82d0b0160cbc Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 18 Jul 2024 16:36:15 -0500
Subject: [PATCH 13/19] don't pass auto in test_attributes

---
 test/pytest/test_optimization/test_attributes.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/pytest/test_optimization/test_attributes.py b/test/pytest/test_optimization/test_attributes.py
index 2669321e09..3ba8d08d14 100644
--- a/test/pytest/test_optimization/test_attributes.py
+++ b/test/pytest/test_optimization/test_attributes.py
@@ -38,6 +38,12 @@ def test_attributes():
     cfg['Model']['Strategy'] = strategy
     cfg['LayerName']['dense']['ReuseFactor'] = 1
 
+    # optimization doesn't yet support auto precision
+    for layer in cfg['LayerName'].values():
+        for key, prec in layer['Precision'].items():
+            if prec == 'auto':
+                layer['Precision'][key] = default_precision
+
     # Verify correct information for every layer
     model_attributes = get_attributes_from_keras_model_and_hls4ml_config(model, cfg)
     assert len(model_attributes) == 4

From 6340655087f7d4ad667cfbe36313addabdb9edfa Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 18 Jul 2024 17:46:14 -0500
Subject: [PATCH 14/19] update documentation

---
 docs/api/configuration.rst | 38 +++++++++++++++++++++++++++++++++++---
 docs/setup.rst             |  2 +-
 docs/status.rst            |  2 +-
 hls4ml/utils/config.py     |  3 ++-
 4 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/docs/api/configuration.rst b/docs/api/configuration.rst
index 091f88e619..72d677d196 100644
--- a/docs/api/configuration.rst
+++ b/docs/api/configuration.rst
@@ -9,6 +9,7 @@ We currently support two ways of setting hls4ml's model configuration. This page
 
 .. contents:: \
 
+The Python API approach is recommended for most users as there are more utilities to help create the configuration dictionaries.
 
 **NOTE:**
 
@@ -16,8 +17,10 @@ We currently support two ways of setting hls4ml's model configuration. This page
 *
   One important part of ``hls4ml`` to remember is that the user is responsible for the format of the inputs.  There is no automatic formatting or normalization so this must be done in the training.
 
-*
+..
+  *
   For developers, you might also want to checkout this section: `Detailed configuration in converted hls codes <#detailed-configuration-in-converted-hls-codes>`_.
+  *Broken link*
 
 ----
 
@@ -31,11 +34,26 @@ Using hls4ml, you can quickly generate a simple configuration dictionary from a
    import hls4ml
    config = hls4ml.utils.config_from_keras_model(model, granularity='model')
 
-For more advanced and detailed configuration, you can also set them through the created dictionary. For example, to change the reuse factor:
+This python dictionary can be edited as needed. A more advanced configuration can be generated by, for example:
+
+.. code-block:: python
+
+   import hls4ml
+   config = hls4ml.utils.config_from_keras_model(
+        model,
+        granularity='name',
+        default_precision='fixed<16,6>',
+        backend='Vitis')
+
+This will include per-layer configuration based on the model. Including the backend is recommended because some configation options depend on the backend. Note, the precisions at the
+higher granularites usually default to 'auto', which means that ``hls4ml`` will try to set it automatically. Note that higher granularity settings take precendence
+over model-level settings. See :py:class:`~hls4ml.utils.config.config_from_keras_model` for more information on the various options.
+
+One can override specific values before using the configuration:
 
 .. code-block:: python
 
-   config['Model']['ReuseFactor'] = 2
+   config['LayerName']['fc1']['ReuseFactor'] = 2
 
 Or to set the precision of a specific layer's weight:
 
@@ -45,6 +63,20 @@ Or to set the precision of a specific layer's weight:
 
 To better understand how the configuration hierachy works, refer to the next section for more details.
 
+Finally, one then uses the configuration to create an hls model:
+
+.. code-block:: python
+
+  hls_model = hls4ml.converters.convert_from_keras_model(
+        model,
+        hls_config=config,
+        output_dir="my_project_dir",
+        io_type='io_stream',
+        backend='Vitis'
+    )
+
+See :py:class:`~hls4ml.converters.convert_from_keras_model` for more information on the various options.
+
 ----
 
 2. YAML Configuration file
diff --git a/docs/setup.rst b/docs/setup.rst
index f99b2f2dcb..a735281c3f 100644
--- a/docs/setup.rst
+++ b/docs/setup.rst
@@ -57,7 +57,7 @@ To run FPGA synthesis, installation of following tools is required:
 
 * Xilinx Vivado HLS 2018.2 to 2020.1 for synthesis for Xilinx FPGAs
 
-  * Vitis HLS 2022.1 or newer is required for synthesis for Xilinx FPGAs using the experimental ``Vitis`` backend.
+  * Vitis HLS 2022.2 or newer is required for synthesis for Xilinx FPGAs using the ``Vitis`` backend.
 
 * Intel Quartus 20.1 to 21.4 for the synthesis for Intel FPGAs
 
diff --git a/docs/status.rst b/docs/status.rst
index e4cac5e735..4ff4d33282 100644
--- a/docs/status.rst
+++ b/docs/status.rst
@@ -81,7 +81,7 @@ Other feature notes:
 * ``hls4ml`` is tested on Linux, and supports
    * Vivado HLS versions 2018.2 to 2020.1
    * Intel HLS versions 20.1 to 21.4
-   * Vitis HLS versions 2020.2 to 2022.2 (experimentally)
+   * Vitis HLS versions 2022.2 to 2024.1
 * Windows and macOS are not supported
 * BDT support has moved to the `Conifer <https://github.com/thesps/conifer>`__ package
 
diff --git a/hls4ml/utils/config.py b/hls4ml/utils/config.py
index d60478ef97..06db9557b7 100644
--- a/hls4ml/utils/config.py
+++ b/hls4ml/utils/config.py
@@ -132,7 +132,8 @@ def config_from_keras_model(
             will generate config keys for every layer separately, allowing for highly specific
             configuration tweaks.
         backend(str, optional): Name of the backend to use
-        default_precision (str, optional): Default precision to use. Defaults to 'fixed<16,6>'.
+        default_precision (str, optional): Default precision to use. Defaults to 'fixed<16,6>'. Note, this must 
+            be an explicit precision: 'auto' is not allowed.
         default_reuse_factor (int, optional): Default reuse factor. Defaults to 1.
         max_precision (str or None, optional): Maximum width precision to use. Defaults to None, meaning no maximum.
             Note:  Only integer and fixed precisions are supported

From 3a2fa002ad6179f866f0e2670bc1a64ff01756c3 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Thu, 18 Jul 2024 17:48:44 -0500
Subject: [PATCH 15/19] update documentation (2)

---
 hls4ml/utils/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hls4ml/utils/config.py b/hls4ml/utils/config.py
index 06db9557b7..904e0f5886 100644
--- a/hls4ml/utils/config.py
+++ b/hls4ml/utils/config.py
@@ -132,7 +132,7 @@ def config_from_keras_model(
             will generate config keys for every layer separately, allowing for highly specific
             configuration tweaks.
         backend(str, optional): Name of the backend to use
-        default_precision (str, optional): Default precision to use. Defaults to 'fixed<16,6>'. Note, this must 
+        default_precision (str, optional): Default precision to use. Defaults to 'fixed<16,6>'. Note, this must
             be an explicit precision: 'auto' is not allowed.
         default_reuse_factor (int, optional): Default reuse factor. Defaults to 1.
         max_precision (str or None, optional): Maximum width precision to use. Defaults to None, meaning no maximum.

From b718580440aeaec2c350232cd87bc83ea0ac5f38 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Wed, 21 Aug 2024 11:13:59 -0500
Subject: [PATCH 16/19] move some optimizers before infering precision type

---
 hls4ml/model/optimizer/__init__.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 192fdb6d48..e823431699 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -33,8 +33,14 @@
 register_flow(
     'convert',
     [
-        'eliminate_linear_activation',  # needs to be before infer_precision_types
-        'seperable_to_depthwise_and_conv',  # has to be before precision inference
+        'fuse_consecutive_batch_normalization',
+        'fuse_batch_normalization',
+        'eliminate_linear_activation',
+        'qkeras_factorize_alpha',
+        'extract_ternary_threshold',
+        'replace_multidimensional_dense_with_conv',
+        'seperable_to_depthwise_and_conv',
+        # The ones above here need to be before infer_precision_types
         'infer_precision_types',
         'channels_last_converter',
         'remove_transpose_before_flatten',
@@ -43,10 +49,7 @@
         'fuse_bias_add',
         'expand_layer_group',
         'output_rounding_saturation_mode',
-        'qkeras_factorize_alpha',
-        'extract_ternary_threshold',
         'fuse_consecutive_batch_normalization',
-        'replace_multidimensional_dense_with_conv',
         'enforce_proxy_model_embedded_config',
     ],
 )  # TODO Maybe not all QKeras optmizers belong here?
@@ -55,8 +58,6 @@
     'optimize',
     [
         'eliminate_linear_activation',
-        'fuse_consecutive_batch_normalization',
-        'fuse_batch_normalization',
         'infer_precision_types',
         'set_precision_concat',
     ],

From 09a4d4e1b0907faaa8bec6d7f451ce9ac4593097 Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Sat, 24 Aug 2024 16:43:54 -0500
Subject: [PATCH 17/19] move up the channnels_last_converter

---
 hls4ml/model/optimizer/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index e823431699..959945c3ba 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -33,6 +33,7 @@
 register_flow(
     'convert',
     [
+        'channels_last_converter',
         'fuse_consecutive_batch_normalization',
         'fuse_batch_normalization',
         'eliminate_linear_activation',
@@ -42,7 +43,6 @@
         'seperable_to_depthwise_and_conv',
         # The ones above here need to be before infer_precision_types
         'infer_precision_types',
-        'channels_last_converter',
         'remove_transpose_before_flatten',
         'remove_nop_transpose',
         'remove_single_channel_transpose',

From 55abefce7d2867c9545b60cb7c9529968f485c8f Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Sun, 25 Aug 2024 11:26:32 -0500
Subject: [PATCH 18/19] put missing precision_merge logic in infer_preicion and
 delete, reorder optimizers

---
 hls4ml/model/optimizer/__init__.py            | 21 ++++------
 .../model/optimizer/passes/infer_precision.py | 16 +++++++-
 .../model/optimizer/passes/precision_merge.py | 40 -------------------
 3 files changed, 22 insertions(+), 55 deletions(-)
 delete mode 100644 hls4ml/model/optimizer/passes/precision_merge.py

diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
index 959945c3ba..77e38b0c5b 100644
--- a/hls4ml/model/optimizer/__init__.py
+++ b/hls4ml/model/optimizer/__init__.py
@@ -34,32 +34,27 @@
     'convert',
     [
         'channels_last_converter',
-        'fuse_consecutive_batch_normalization',
-        'fuse_batch_normalization',
-        'eliminate_linear_activation',
-        'qkeras_factorize_alpha',
-        'extract_ternary_threshold',
-        'replace_multidimensional_dense_with_conv',
         'seperable_to_depthwise_and_conv',
-        # The ones above here need to be before infer_precision_types
-        'infer_precision_types',
         'remove_transpose_before_flatten',
         'remove_nop_transpose',
         'remove_single_channel_transpose',
         'fuse_bias_add',
         'expand_layer_group',
         'output_rounding_saturation_mode',
+        'qkeras_factorize_alpha',
+        'extract_ternary_threshold',
         'fuse_consecutive_batch_normalization',
+        'fuse_batch_normalization',
+        'replace_multidimensional_dense_with_conv',
         'enforce_proxy_model_embedded_config',
+        'eliminate_linear_activation',
+        # many of the above optimzers need to be done before this
+        'infer_precision_types',
     ],
 )  # TODO Maybe not all QKeras optmizers belong here?
 
 register_flow(
     'optimize',
-    [
-        'eliminate_linear_activation',
-        'infer_precision_types',
-        'set_precision_concat',
-    ],
+    [],
     requires=['convert'],
 )
diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py
index 065bfcde31..bb24f2206e 100644
--- a/hls4ml/model/optimizer/passes/infer_precision.py
+++ b/hls4ml/model/optimizer/passes/infer_precision.py
@@ -4,7 +4,14 @@
 import numpy as np
 
 from hls4ml.model.optimizer import ConfigurableOptimizerPass
-from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, PrecisionType, UnspecifiedPrecisionType
+from hls4ml.model.types import (
+    FixedPrecisionType,
+    IntegerPrecisionType,
+    PrecisionType,
+    RoundingMode,
+    SaturationMode,
+    UnspecifiedPrecisionType,
+)
 
 # TODO:  The code assumes everything is Fixed or Integer precision. Need to add checks
 
@@ -481,7 +488,12 @@ def _infer_cat_precision(self, node, types_to_infer):
                 new_width = min(new_width, max_precision.width)
                 new_int = min(new_int, max_precision.integer)
 
-            out_precision = FixedPrecisionType(new_width, new_int, new_signed)
+            # some logic copied from former SetPrecisionConcat optimizer
+            newrmode = input_1.rounding_mode if input_1.rounding_mode != RoundingMode.TRN else input_2.rounding_mode
+            newsmode = input_1.saturation_mode if input_1.saturation_mode != SaturationMode.WRAP else input_2.saturation_mode
+            newsbits = input_1.saturation_bits if input_1.saturation_bits != 0 else input_2.saturation_bits
+
+            out_precision = FixedPrecisionType(new_width, new_int, new_signed, newrmode, newsmode, newsbits)
         else:
             out_precision = self._get_default_precision(node)
 
diff --git a/hls4ml/model/optimizer/passes/precision_merge.py b/hls4ml/model/optimizer/passes/precision_merge.py
deleted file mode 100644
index 9e79b11000..0000000000
--- a/hls4ml/model/optimizer/passes/precision_merge.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.types import FixedPrecisionType, RoundingMode, SaturationMode
-
-
-def get_concat_type(itype1, itype2):
-    newwidth = max(itype1.width, itype2.width)
-    newint = max(itype1.integer, itype2.integer)
-    if itype1.signed ^ itype2.signed:  # XOR
-        newint += 1
-        newwidth += 1
-    newrmode = itype1.rounding_mode if itype1.rounding_mode != RoundingMode.TRN else itype2.rounding_mode
-    newsmode = itype1.saturation_mode if itype1.saturation_mode != SaturationMode.WRAP else itype2.saturation_mode
-    newsbits = itype1.saturation_bits if itype1.saturation_bits != 0 else itype2.saturation_bits
-
-    newtype = FixedPrecisionType(newwidth, newint, itype1.signed or itype2.signed, newrmode, newsmode, newsbits)
-    return newtype
-
-
-class SetPrecisionConcat(OptimizerPass):
-    def match(self, node):
-        if node.__class__.__name__ == 'Concatenate':
-            otype = node.get_output_variable().type.precision
-            itype1 = node.get_input_variable(node.inputs[0]).type.precision
-            itype2 = node.get_input_variable(node.inputs[1]).type.precision
-            if isinstance(otype, FixedPrecisionType) and otype != get_concat_type(itype1, itype2):
-                return True
-        return False
-
-    def transform(self, model, node):
-        """
-        Set concat output precision
-        """
-        otype = node.get_output_variable().type.precision
-        itype1 = node.get_input_variable(node.inputs[0]).type.precision
-        itype2 = node.get_input_variable(node.inputs[1]).type.precision
-        newtype = get_concat_type(itype1, itype2)
-        print(f"Found {node.name} in the model, optimizing {otype} to {newtype}...")
-        node.get_output_variable().type.precision = newtype
-
-        return True

From 910f81a4680645fdbba54f85d7865818a6ed48be Mon Sep 17 00:00:00 2001
From: Jovan Mitrevski <jmitrevs@fnal.gov>
Date: Sun, 25 Aug 2024 17:27:17 -0500
Subject: [PATCH 19/19] add type inference to catapult

---
 hls4ml/backends/catapult/catapult_backend.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hls4ml/backends/catapult/catapult_backend.py b/hls4ml/backends/catapult/catapult_backend.py
index 0583e80dab..d939e1f30b 100644
--- a/hls4ml/backends/catapult/catapult_backend.py
+++ b/hls4ml/backends/catapult/catapult_backend.py
@@ -110,6 +110,7 @@ def _register_flows(self):
             'catapult:inplace_stream_flatten',
             'catapult:skip_softmax',
             'catapult:fix_softmax_table_size',
+            'infer_precision_types',
         ]
         optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)