Fix hybrid bigm formulation for linear trees (#164)

The changes in #163 included changes to the hybrid bigm formulation for linear tree that, while mathematically equivalent, made for a larger formulation in terms of number of constraints. This PR corrects that: It still uses the `gdp.bound_pretransformation` to generate the constraints bounding the features values for each leaf, but it manually transforms the constraints setting the output value to the leaf's linear function, equivalently to @bammari's original implementation. In addition it adds a test to check that the size of the resulting formulation is what is expected. **Legal Acknowledgement**\ By contributing to this software project, I agree my contributions are submitted under the BSD license. I represent I am authorized to make the contributions and grant the license. If my employer has rights to intellectual property that includes these contributions, I represent that I have received permission to make contributions and grant the required license on behalf of that employer. --------- Co-authored-by: Emma Johnson <[email protected]> Co-authored-by: jalving <[email protected]>
cog-imperial · Dec 8, 2024 · e9e34b4 · e9e34b4
1 parent 321a2e2
commit e9e34b4
Show file tree

Hide file tree

Showing 8 changed files with 250 additions and 201 deletions.
diff --git a/docs/notebooks/neuralnet/graph_neural_network_formulation.ipynb b/docs/notebooks/neuralnet/graph_neural_network_formulation.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -74,12 +74,12 @@ extend-exclude = ["src/omlt/_version.py"]
 [tool.ruff.lint]
 select = ["ALL"]
 ignore = [
-  "ANN101",
   "ANN401",
   "COM812",
   "ISC001",
   "SLF001",
   "ARG001",
+  "PLC0206",
   "N803",
   "N806",
   # Remove these after issue https://github.com/cog-imperial/OMLT/issues/153 is fixed.
@@ -96,7 +96,7 @@ ignore = [
   "ANN002",
   "ANN201",
   "ANN202",
-  "ANN204",
+  "ANN204"
 ]
 
 [tool.ruff.lint.pydocstyle]

diff --git a/src/omlt/__init__.py b/src/omlt/__init__.py
@@ -13,7 +13,7 @@
 from omlt.scaling import OffsetScaling
 
 __all__ = [
-    "OmltBlock",
     "OffsetScaling",
+    "OmltBlock",
     "__version__",
 ]
diff --git a/src/omlt/io/__init__.py b/src/omlt/io/__init__.py
@@ -17,11 +17,11 @@
 
 __all__ = [
     "keras_available",
+    "load_keras_sequential",
+    "load_onnx_neural_network",
+    "load_onnx_neural_network_with_bounds",
     "onnx_available",
     "torch_available",
     "torch_geometric_available",
-    "load_onnx_neural_network",
-    "load_onnx_neural_network_with_bounds",
     "write_onnx_model_with_bounds",
-    "load_keras_sequential",
 ]
diff --git a/src/omlt/linear_tree/lt_formulation.py b/src/omlt/linear_tree/lt_formulation.py
@@ -106,6 +106,7 @@ def _build_formulation(self):
             output_vars=self.block.scaled_outputs,
             transformation=self.transformation,
             epsilon=self.epsilon,
+            include_leaf_equalities=True,
         )
 
 
@@ -170,23 +171,51 @@ def _build_formulation(self):
         This method is called by the OmltBlock to build the corresponding
         mathematical formulation on the Pyomo block.
         """
+        block = self.block
+        leaves = self.model_definition.leaves
+
         _setup_scaled_inputs_outputs(
-            self.block,
+            block,
             self.model_definition.scaling_object,
             self.model_definition.scaled_input_bounds,
         )
 
+        input_vars = self.block.scaled_inputs
+
         _add_gdp_formulation_to_block(
-            block=self.block,
+            block=block,
             model_definition=self.model_definition,
-            input_vars=self.block.scaled_inputs,
+            input_vars=input_vars,
             output_vars=self.block.scaled_outputs,
             transformation="custom",
             epsilon=self.epsilon,
+            include_leaf_equalities=False,
         )
 
-        pe.TransformationFactory("gdp.bound_pretransformation").apply_to(self.block)
-        pe.TransformationFactory("gdp.binary_multiplication").apply_to(self.block)
+        pe.TransformationFactory("gdp.bound_pretransformation").apply_to(block)
+        # It doesn't really matter what transformation we call next, so we just
+        # use bigm--all it's going to do is create the exactly-one constraints
+        # and mark all the disjunctive parts of the model as transformed.
+        pe.TransformationFactory("gdp.bigm").apply_to(block)
+
+        # We now create the \sum((a_l^Tx + b_l)*y_l for l in leaves) = d constraints
+        # manually.
+        features = np.arange(0, self.model_definition.n_inputs)
+
+        @block.Constraint(list(leaves.keys()))
+        def linear_constraint(mdl, tree):
+            leaf_ids = list(leaves[tree].keys())
+            return block.intermediate_output[tree] == sum(
+                (
+                    sum(
+                        leaves[tree][leaf]["slope"][feat] * input_vars[feat]
+                        for feat in features
+                    )
+                    + leaves[tree][leaf]["intercept"]
+                )
+                * block.disjunct[tree, leaf].binary_indicator_var
+                for leaf in leaf_ids
+            )
 
 
 def _build_output_bounds(model_def, input_bounds):
@@ -232,7 +261,13 @@ def _build_output_bounds(model_def, input_bounds):
 
 
 def _add_gdp_formulation_to_block(  # noqa: PLR0913
-    block, model_definition, input_vars, output_vars, transformation, epsilon
+    block,
+    model_definition,
+    input_vars,
+    output_vars,
+    transformation,
+    epsilon,
+    include_leaf_equalities,
 ):
     """This function adds the GDP representation to the OmltBlock using Pyomo.GDP.
 
@@ -245,7 +280,9 @@ def _add_gdp_formulation_to_block(  # noqa: PLR0913
         epsilon: Tolerance to use in enforcing that choosing the right
             branch of a linear tree node can only happen if the feature
             is strictly greater than the branch value.
-
+        include_leaf_equalities: boolean to indicate if the formulation
+            should include the equalities setting the leaf values or not.
+            (default: True)
     """
     leaves = model_definition.leaves
     input_bounds = model_definition.scaled_input_bounds
@@ -283,12 +320,13 @@ def ub_rule(dsj, feat):
 
         dsj.ub_constraint = pe.Constraint(features, rule=ub_rule)
 
-        slope = leaves[tree][leaf]["slope"]
-        intercept = leaves[tree][leaf]["intercept"]
-        dsj.linear_exp = pe.Constraint(
-            expr=sum(slope[k] * input_vars[k] for k in features) + intercept
-            == block.intermediate_output[tree]
-        )
+        if include_leaf_equalities:
+            slope = leaves[tree][leaf]["slope"]
+            intercept = leaves[tree][leaf]["intercept"]
+            dsj.linear_exp = pe.Constraint(
+                expr=sum(slope[k] * input_vars[k] for k in features) + intercept
+                == block.intermediate_output[tree]
+            )
 
     block.disjunct = Disjunct(t_l, rule=disjuncts_rule)
 

diff --git a/src/omlt/neuralnet/__init__.py b/src/omlt/neuralnet/__init__.py
@@ -32,9 +32,9 @@
 )
 
 __all__ = [
-    "NetworkDefinition",
     "FullSpaceNNFormulation",
     "FullSpaceSmoothNNFormulation",
+    "NetworkDefinition",
     "ReducedSpaceNNFormulation",
     "ReducedSpaceSmoothNNFormulation",
     "ReluBigMFormulation",

diff --git a/src/omlt/neuralnet/activations/__init__.py b/src/omlt/neuralnet/activations/__init__.py
@@ -30,16 +30,16 @@
 NON_INCREASING_ACTIVATIONS: list[Any] = []
 
 __all__ = [
-    "linear_activation_constraint",
-    "linear_activation_function",
+    "ACTIVATION_FUNCTION_MAP",
+    "NON_INCREASING_ACTIVATIONS",
     "ComplementarityReLUActivation",
     "bigm_relu_activation_constraint",
+    "linear_activation_constraint",
+    "linear_activation_function",
     "sigmoid_activation_constraint",
     "sigmoid_activation_function",
     "softplus_activation_constraint",
     "softplus_activation_function",
     "tanh_activation_constraint",
     "tanh_activation_function",
-    "ACTIVATION_FUNCTION_MAP",
-    "NON_INCREASING_ACTIVATIONS",
 ]
diff --git a/tests/linear_tree/test_lt_formulation.py b/tests/linear_tree/test_lt_formulation.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pyomo.environ as pe
 import pytest
+from pyomo.common.collections import ComponentSet
+from pyomo.core.expr import identify_variables
 
 from omlt.dependencies import lineartree_available
 
@@ -245,7 +247,7 @@ def test_nonzero_epsilon():
     solution = (pe.value(model_good.x), pe.value(model_good.y))
     y_pred = regr_small.predict(np.array(solution[0]).reshape(1, -1))
     # With epsilon, the model matches the tree prediction
-    assert y_pred[0] == pytest.approx(solution[1])
+    assert y_pred[0] == pytest.approx(solution[1], abs=1e-4)
 
 
 @pytest.mark.skipif(
@@ -657,6 +659,20 @@ def test_hybrid_bigm_formulation_multi_var():
     model1.lt = OmltBlock()
     model1.lt.build_formulation(formulation1_lt)
 
+    num_constraints = 0
+    var_set = ComponentSet()
+    for cons in model1.lt.component_data_objects(pe.Constraint, active=True):
+        num_constraints += 1
+        for v in identify_variables(cons.expr):
+            var_set.add(v)
+
+    num_leaves = len(ltmodel_small.leaves[0])
+    # binary for each leaf + two inputs and an output + 5 scaled input/output vars
+    assert len(var_set) == num_leaves + 3 + 4
+    # 2 bounds constraints for each input, the xor, the output constraint, and
+    # four scaling constraints from OMLT
+    assert num_constraints == 2 * 2 + 1 + 1 + 4
+
     @model1.Constraint()
     def connect_input1(mdl):
         return mdl.x0 == mdl.lt.inputs[0]