From db91cd9a8ded637a0a90a9ee6f5c666407fcde0b Mon Sep 17 00:00:00 2001 From: bammari Date: Wed, 4 Oct 2023 10:25:18 -0400 Subject: [PATCH 1/7] Initial commit to allow for linear tree classifers --- src/omlt/linear_tree/lt_definition.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/omlt/linear_tree/lt_definition.py b/src/omlt/linear_tree/lt_definition.py index e45274fd..b78bbd4c 100644 --- a/src/omlt/linear_tree/lt_definition.py +++ b/src/omlt/linear_tree/lt_definition.py @@ -297,8 +297,8 @@ def _parse_tree_data(model, input_bounds): # keys in the splits dictionary for leaf in leaves: del splits[leaf] - leaves[leaf]["slope"] = list(leaves[leaf]["models"].coef_) - leaves[leaf]["intercept"] = leaves[leaf]["models"].intercept_ + leaves[leaf]["slope"] = list(leaves[leaf]["models"].coef_.reshape((-1,))) + leaves[leaf]["intercept"] = leaves[leaf]["models"].intercept_.reshape((-1,))[0] # This loop creates an parent node id entry for each node in the tree for split in splits: @@ -375,6 +375,8 @@ def _parse_tree_data(model, input_bounds): for leaf in leaves: leaves[leaf]["bounds"][feat] = [None, None] + import pprint + pprint.pprint(leaves) # Finally, go through each split and assign it's threshold value as the # upper bound to all the leaves descending to the left of the split and # as the lower bound to all the leaves descending to the right. From 8395aaad830f0c62e65667bba649fe36cebaa95b Mon Sep 17 00:00:00 2001 From: bammari Date: Wed, 4 Oct 2023 10:37:17 -0400 Subject: [PATCH 2/7] Handle classification tasks --- src/omlt/linear_tree/lt_definition.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/omlt/linear_tree/lt_definition.py b/src/omlt/linear_tree/lt_definition.py index b78bbd4c..0431696e 100644 --- a/src/omlt/linear_tree/lt_definition.py +++ b/src/omlt/linear_tree/lt_definition.py @@ -266,7 +266,7 @@ def _parse_tree_data(model, input_bounds): # Include checks to ensure that the input dict is the model summary which # is obtained by calling the summary() method contained within the # linear-tree package (e.g. dict = model.summary()) - if isinstance(model, lineartree.lineartree.LinearTreeRegressor) is True: + if isinstance(model, lineartree.lineartree.LinearTreeRegressor) is True or isinstance(model, lineartree.lineartree.LinearTreeClassifier) is True: leaves = model.summary(only_leaves=True) splits = model.summary() elif isinstance(model, dict) is True: @@ -375,8 +375,7 @@ def _parse_tree_data(model, input_bounds): for leaf in leaves: leaves[leaf]["bounds"][feat] = [None, None] - import pprint - pprint.pprint(leaves) + # Finally, go through each split and assign it's threshold value as the # upper bound to all the leaves descending to the left of the split and # as the lower bound to all the leaves descending to the right. From 9a559bbf5cc38f056bbcbdeff68a8f4c4bbeef56 Mon Sep 17 00:00:00 2001 From: bammari Date: Mon, 16 Oct 2023 20:03:03 -0400 Subject: [PATCH 3/7] Accounting for DummyClassifiers in leaves --- src/omlt/linear_tree/lt_definition.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/omlt/linear_tree/lt_definition.py b/src/omlt/linear_tree/lt_definition.py index 0431696e..3b9a6c92 100644 --- a/src/omlt/linear_tree/lt_definition.py +++ b/src/omlt/linear_tree/lt_definition.py @@ -1,5 +1,6 @@ import numpy as np import lineartree +import sklearn class LinearTreeDefinition: @@ -294,11 +295,24 @@ def _parse_tree_data(model, input_bounds): raise TypeError("Model entry must be dict or linear-tree instance") # This loop adds keys for the slopes and intercept and removes the leaf - # keys in the splits dictionary + # keys in the splits dictionary. For LinearTreeClassifier, check if + # the model in the leaf is a DummyClassifier. If so, use the information + # in the prior to determine whether the intercept is 1, or -1. Otherwise + # use the slope/intercept information in the RidgeClassifier or + # LinearTreeRegressor classes for leaf in leaves: del splits[leaf] - leaves[leaf]["slope"] = list(leaves[leaf]["models"].coef_.reshape((-1,))) - leaves[leaf]["intercept"] = leaves[leaf]["models"].intercept_.reshape((-1,))[0] + model_in_leaf = leaves[leaf]["models"] + if isinstance(model_in_leaf, sklearn.dummy.DummyClassifier): + prior = model_in_leaf.class_prior_ + leaves[leaf]["slope"] = list(np.zeros(len(input_bounds.keys()))) + if prior[0] <= prior[1]: + leaves[leaf]["intercept"] = -1 + else: + leaves[leaf]["intercept"] = 1 + else: + leaves[leaf]["slope"] = list(model_in_leaf.coef_.reshape((-1,))) + leaves[leaf]["intercept"] = model_in_leaf.intercept_.reshape((-1,))[0] # This loop creates an parent node id entry for each node in the tree for split in splits: From 2f5dfd309e7081cf9652cba4b25a5bfadfd7046b Mon Sep 17 00:00:00 2001 From: bammari Date: Mon, 16 Oct 2023 20:19:49 -0400 Subject: [PATCH 4/7] Correct prior values --- src/omlt/linear_tree/lt_definition.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/omlt/linear_tree/lt_definition.py b/src/omlt/linear_tree/lt_definition.py index 3b9a6c92..d3b5b998 100644 --- a/src/omlt/linear_tree/lt_definition.py +++ b/src/omlt/linear_tree/lt_definition.py @@ -307,9 +307,9 @@ def _parse_tree_data(model, input_bounds): prior = model_in_leaf.class_prior_ leaves[leaf]["slope"] = list(np.zeros(len(input_bounds.keys()))) if prior[0] <= prior[1]: - leaves[leaf]["intercept"] = -1 - else: leaves[leaf]["intercept"] = 1 + else: + leaves[leaf]["intercept"] = -1 else: leaves[leaf]["slope"] = list(model_in_leaf.coef_.reshape((-1,))) leaves[leaf]["intercept"] = model_in_leaf.intercept_.reshape((-1,))[0] From 03b126a0b90e88ca742786e0652f3fab3a80c25f Mon Sep 17 00:00:00 2001 From: bammari Date: Mon, 16 Oct 2023 22:19:55 -0400 Subject: [PATCH 5/7] Accounting for single class dummy in leaf --- src/omlt/linear_tree/lt_definition.py | 35 +++++++++++++++++++++------ 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/src/omlt/linear_tree/lt_definition.py b/src/omlt/linear_tree/lt_definition.py index d3b5b998..4f031c59 100644 --- a/src/omlt/linear_tree/lt_definition.py +++ b/src/omlt/linear_tree/lt_definition.py @@ -302,17 +302,36 @@ def _parse_tree_data(model, input_bounds): # LinearTreeRegressor classes for leaf in leaves: del splits[leaf] - model_in_leaf = leaves[leaf]["models"] - if isinstance(model_in_leaf, sklearn.dummy.DummyClassifier): - prior = model_in_leaf.class_prior_ + if isinstance(model, lineartree.lineartree.LinearTreeClassifier) is True: + num_classes = len(leaves[leaf]['classes']) + else: + num_classes = 100 + + if num_classes < 2: + class_val = int(leaves[leaf]['classes'][0]) leaves[leaf]["slope"] = list(np.zeros(len(input_bounds.keys()))) - if prior[0] <= prior[1]: - leaves[leaf]["intercept"] = 1 - else: + if class_val == 0: leaves[leaf]["intercept"] = -1 + else: + leaves[leaf]["intercept"] = 1 else: - leaves[leaf]["slope"] = list(model_in_leaf.coef_.reshape((-1,))) - leaves[leaf]["intercept"] = model_in_leaf.intercept_.reshape((-1,))[0] + model_in_leaf = leaves[leaf]["models"] + if isinstance(model_in_leaf, sklearn.dummy.DummyClassifier): + prior = model_in_leaf.class_prior_ + leaves[leaf]["slope"] = list(np.zeros(len(input_bounds.keys()))) + if len(prior) < 2: + pred_val = int(model_in_leaf.predict([0])[0]) + if pred_val == 0: + leaves[leaf]["intercept"] = -1 + else: + leaves[leaf]["intercept"] = 1 + elif prior[0] <= prior[1]: + leaves[leaf]["intercept"] = 1 + else: + leaves[leaf]["intercept"] = -1 + else: + leaves[leaf]["slope"] = list(model_in_leaf.coef_.reshape((-1,))) + leaves[leaf]["intercept"] = model_in_leaf.intercept_.reshape((-1,))[0] # This loop creates an parent node id entry for each node in the tree for split in splits: From ff91cc6e07c2db76784c4a1edc3f452d0e5a5288 Mon Sep 17 00:00:00 2001 From: bammari Date: Mon, 16 Oct 2023 22:23:19 -0400 Subject: [PATCH 6/7] Docstring update --- src/omlt/linear_tree/lt_definition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/omlt/linear_tree/lt_definition.py b/src/omlt/linear_tree/lt_definition.py index 4f031c59..1c327084 100644 --- a/src/omlt/linear_tree/lt_definition.py +++ b/src/omlt/linear_tree/lt_definition.py @@ -305,7 +305,7 @@ def _parse_tree_data(model, input_bounds): if isinstance(model, lineartree.lineartree.LinearTreeClassifier) is True: num_classes = len(leaves[leaf]['classes']) else: - num_classes = 100 + num_classes = 999 if num_classes < 2: class_val = int(leaves[leaf]['classes'][0]) From fd1bf7dc0269a8dc2968b19a11774eacd15bed88 Mon Sep 17 00:00:00 2001 From: bammari Date: Tue, 31 Oct 2023 11:21:52 -0400 Subject: [PATCH 7/7] Linting --- src/omlt/linear_tree/lt_definition.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/omlt/linear_tree/lt_definition.py b/src/omlt/linear_tree/lt_definition.py index 1c327084..74882666 100644 --- a/src/omlt/linear_tree/lt_definition.py +++ b/src/omlt/linear_tree/lt_definition.py @@ -267,7 +267,10 @@ def _parse_tree_data(model, input_bounds): # Include checks to ensure that the input dict is the model summary which # is obtained by calling the summary() method contained within the # linear-tree package (e.g. dict = model.summary()) - if isinstance(model, lineartree.lineartree.LinearTreeRegressor) is True or isinstance(model, lineartree.lineartree.LinearTreeClassifier) is True: + if ( + isinstance(model, lineartree.lineartree.LinearTreeRegressor) is True + or isinstance(model, lineartree.lineartree.LinearTreeClassifier) is True + ): leaves = model.summary(only_leaves=True) splits = model.summary() elif isinstance(model, dict) is True: @@ -298,17 +301,17 @@ def _parse_tree_data(model, input_bounds): # keys in the splits dictionary. For LinearTreeClassifier, check if # the model in the leaf is a DummyClassifier. If so, use the information # in the prior to determine whether the intercept is 1, or -1. Otherwise - # use the slope/intercept information in the RidgeClassifier or - # LinearTreeRegressor classes + # use the slope/intercept information in the RidgeClassifier or + # LinearTreeRegressor classes for leaf in leaves: del splits[leaf] if isinstance(model, lineartree.lineartree.LinearTreeClassifier) is True: - num_classes = len(leaves[leaf]['classes']) + num_classes = len(leaves[leaf]["classes"]) else: num_classes = 999 - + if num_classes < 2: - class_val = int(leaves[leaf]['classes'][0]) + class_val = int(leaves[leaf]["classes"][0]) leaves[leaf]["slope"] = list(np.zeros(len(input_bounds.keys()))) if class_val == 0: leaves[leaf]["intercept"] = -1 @@ -408,7 +411,6 @@ def _parse_tree_data(model, input_bounds): for leaf in leaves: leaves[leaf]["bounds"][feat] = [None, None] - # Finally, go through each split and assign it's threshold value as the # upper bound to all the leaves descending to the left of the split and # as the lower bound to all the leaves descending to the right.