diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 00000000..14b112f2 Binary files /dev/null and b/.DS_Store differ diff --git a/lime/.DS_Store b/lime/.DS_Store new file mode 100644 index 00000000..86f93530 Binary files /dev/null and b/lime/.DS_Store differ diff --git a/lime/discretize.py b/lime/discretize.py index 41635198..4d9f0daf 100644 --- a/lime/discretize.py +++ b/lime/discretize.py @@ -6,6 +6,7 @@ import sklearn.tree from sklearn.utils import check_random_state from abc import ABCMeta, abstractmethod +import copy class BaseDiscretizer(): @@ -18,7 +19,8 @@ class BaseDiscretizer(): __metaclass__ = ABCMeta # abstract class - def __init__(self, data, categorical_features, feature_names, labels=None, random_state=None): + def __init__(self, data, categorical_features, feature_names, + labels=None, scaler=None, random_state=None): """Initializer Args: data: numpy 2d array @@ -41,7 +43,7 @@ def __init__(self, data, categorical_features, feature_names, labels=None, rando self.mins = {} self.maxs = {} self.random_state = check_random_state(random_state) - + self.scaler = scaler # To override when implementing a custom binning bins = self.bins(data, labels) bins = [np.unique(x) for x in bins] @@ -50,13 +52,17 @@ def __init__(self, data, categorical_features, feature_names, labels=None, rando n_bins = qts.shape[0] # Actually number of borders (= #bins-1) boundaries = np.min(data[:, feature]), np.max(data[:, feature]) name = feature_names[feature] - - self.names[feature] = ['%s <= %.2f' % (name, qts[0])] + # ADD: inverse transform output value back to the natural value + qts_cp = copy.deepcopy(qts) + if scaler is not None: + for i in range(n_bins): + dummy = np.zeros(len(bins)) + dummy[feature] = qts[i] + qts_cp[i] = scaler.inverse_transform(dummy)[feature] + self.names[feature] = ['%s <= %.2f' % (name, qts_cp[0])] for i in range(n_bins - 1): - self.names[feature].append('%.2f < %s <= %.2f' % - (qts[i], name, qts[i + 1])) - self.names[feature].append('%s > %.2f' % (name, qts[n_bins - 1])) - + self.names[feature].append('%.2f < %s <= %.2f' % (qts_cp[i], name, qts_cp[i + 1])) + self.names[feature].append('%s > %.2f' % (name, qts_cp[n_bins - 1])) self.lambdas[feature] = lambda x, qts=qts: np.searchsorted(qts, x) discretized = self.lambdas[feature](data[:, feature]) @@ -107,7 +113,8 @@ def undiscretize(self, data): def get_inverse(q): return max(mins[q], - min(self.random_state.normal(means[q], stds[q]), maxs[q])) + min(self.random_state.normal( + means[q], stds[q]), maxs[q])) if len(data.shape) == 1: q = int(ret[feature]) ret[feature] = get_inverse(q) @@ -118,11 +125,12 @@ def get_inverse(q): class QuartileDiscretizer(BaseDiscretizer): - def __init__(self, data, categorical_features, feature_names, labels=None, random_state=None): + def __init__(self, data, categorical_features, feature_names, + labels=None, random_state=None, scaler=None): BaseDiscretizer.__init__(self, data, categorical_features, feature_names, labels=labels, - random_state=random_state) + random_state=random_state, scaler=scaler) def bins(self, data, labels): bins = [] @@ -133,10 +141,11 @@ def bins(self, data, labels): class DecileDiscretizer(BaseDiscretizer): - def __init__(self, data, categorical_features, feature_names, labels=None, random_state=None): + def __init__(self, data, categorical_features, feature_names, + labels=None, random_state=None, scaler=None): BaseDiscretizer.__init__(self, data, categorical_features, feature_names, labels=labels, - random_state=random_state) + random_state=random_state, scaler=scaler) def bins(self, data, labels): bins = [] @@ -148,13 +157,14 @@ def bins(self, data, labels): class EntropyDiscretizer(BaseDiscretizer): - def __init__(self, data, categorical_features, feature_names, labels=None, random_state=None): + def __init__(self, data, categorical_features, feature_names, + labels=None, random_state=None, scaler=None): if(labels is None): raise ValueError('Labels must be not None when using \ EntropyDiscretizer') BaseDiscretizer.__init__(self, data, categorical_features, feature_names, labels=labels, - random_state=random_state) + random_state=random_state, scaler=scaler) def bins(self, data, labels): bins = [] diff --git a/lime/lime_tabular.py b/lime/lime_tabular.py index ec741f1f..237667a9 100644 --- a/lime/lime_tabular.py +++ b/lime/lime_tabular.py @@ -23,7 +23,7 @@ class TableDomainMapper(explanation.DomainMapper): """Maps feature ids to names, generates table views, etc""" def __init__(self, feature_names, feature_values, scaled_row, - categorical_features, discretized_feature_names=None): + categorical_features, discretized_feature_names=None, outer_scaler=None): """Init. Args: @@ -39,6 +39,7 @@ def __init__(self, feature_names, feature_values, scaled_row, self.scaled_row = scaled_row self.all_categorical = len(categorical_features) == len(scaled_row) self.categorical_features = categorical_features + self.outer_scaler = outer_scaler def map_exp_ids(self, exp): """Maps ids to feature names. @@ -76,8 +77,14 @@ def visualize_instance_html(self, weights = [0] * len(self.feature_names) for x in exp: weights[x[0]] = x[1] + # ADD: inverse transform output value back to the natural value + outer_scaler = self.outer_scaler + outer_value = self.feature_values + if outer_scaler is not None: + outer_list = list(map(float, self.feature_values[0:6])) + outer_value[0:6] = outer_scaler.inverse_transform(outer_list).round() out_list = list(zip(self.exp_feature_names, - self.feature_values, + outer_value, weights)) if not show_all: out_list = [out_list[x[0]] for x in exp] @@ -110,7 +117,8 @@ def __init__(self, discretize_continuous=True, discretizer='quartile', sample_around_instance=False, - random_state=None): + random_state=None, + outer_scaler=None): """Init function. Args: @@ -153,7 +161,7 @@ def __init__(self, self.mode = mode self.categorical_names = categorical_names or {} self.sample_around_instance = sample_around_instance - + self.outer_scaler = outer_scaler if categorical_features is None: categorical_features = [] if feature_names is None: @@ -167,15 +175,15 @@ def __init__(self, if discretizer == 'quartile': self.discretizer = QuartileDiscretizer( training_data, self.categorical_features, - self.feature_names, labels=training_labels) + self.feature_names, labels=training_labels, scaler=self.outer_scaler) elif discretizer == 'decile': self.discretizer = DecileDiscretizer( training_data, self.categorical_features, - self.feature_names, labels=training_labels) + self.feature_names, labels=training_labels, scaler=self.outer_scaler) elif discretizer == 'entropy': self.discretizer = EntropyDiscretizer( training_data, self.categorical_features, - self.feature_names, labels=training_labels) + self.feature_names, labels=training_labels, scaler=self.outer_scaler) elif isinstance(discretizer, BaseDiscretizer): self.discretizer = discretizer else: @@ -262,9 +270,9 @@ def explain_instance(self, An Explanation object (see explanation.py) with the corresponding explanations. """ + outer_scaler = self.outer_scaler data, inverse = self.__data_inverse(data_row, num_samples) scaled_data = (data - self.scaler.mean_) / self.scaler.scale_ - distances = sklearn.metrics.pairwise_distances( scaled_data, scaled_data[0].reshape(1, -1), @@ -342,7 +350,8 @@ def explain_instance(self, values, scaled_data[0], categorical_features=categorical_features, - discretized_feature_names=discretized_feature_names) + discretized_feature_names=discretized_feature_names, + outer_scaler=outer_scaler) ret_exp = explanation.Explanation(domain_mapper, mode=self.mode, class_names=self.class_names) @@ -370,7 +379,6 @@ def explain_instance(self, num_features, model_regressor=model_regressor, feature_selection=self.feature_selection) - if self.mode == "regression": ret_exp.intercept[1] = ret_exp.intercept[0] ret_exp.local_exp[1] = [x for x in ret_exp.local_exp[0]]