Merge pull request #37 from oxfordinternetinstitute/frontier

Enable Frontier Extraction
oxfordinternetinstitute · Aug 19, 2024 · 06cda2c · 06cda2c
2 parents 4abc17b + 8ea1e23
commit 06cda2c
Show file tree

Hide file tree

Showing 9 changed files with 528 additions and 55 deletions.
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ OxonFair is an expressive toolkit designed to enforce a wide-range of fairness d
 The toolkit is designed to overcome a range of shortcomings in existing fairness toolkits for high-capacity models that overfit to the training data.
 It is designed and works for computer vision and NLP problems alongside tabular data.
 
-Check out the colab demo [here](https://colab.research.google.com/drive/1CfcS3AX7M2MO1wW33wU1LDiY5DwtyyxH?usp=sharing).
+Check out the colab demo [here](https://colab.research.google.com/drive/1CfcS3AX7M2MO1wW33wU1LDiY5DwtyyxH?usp=sharing) or read the [preprint](https://arxiv.org/abs/2407.13710).
 
 For low-capacity models (e.g., logistic regression over a small number of variables, and decision-trees of limited depth), we recommend [fairlearn](https://github.com/fairlearn/fairlearn).
 
@@ -20,7 +20,7 @@ In the terminal type:
 
     pip install 'oxonfair[full]'
 
-This will download and install enough code to run any notebooks except those comparing with fairlearn. This includes autogluon, pytorch, and XGBoost. If this is too many dependancies, try a minimal install.
+This will download and install enough code to run any notebooks except those comparing with fairlearn. This includes autogluon, pytorch, and XGBoost. If this is too many dependencies, try a minimal install.
 
 ### Minimal install
 

diff --git a/examples/README.md b/examples/README.md
@@ -11,3 +11,4 @@ This folder contains a collection of example ipython notebooks illustrating diff
     a. A comparison using random forests and decision trees on the adult dataset. [Here](adult_fairlearn_comparision.ipynb)
     b. A comparison using xgboost on medical data. [Here](high-dim_fairlearn_comparision.ipynb)
     c. A comparison of run time using xgboost on multiple groups. [Here](multi_group_fairlearn_comparision.ipynb)
+7. A Comparision with [Fairret](./fairret.ipynb).
diff --git a/examples/fairret.ipynb b/examples/fairret.ipynb
diff --git a/examples/quickstart_DeepFairPredictor_computer_vision.ipynb b/examples/quickstart_DeepFairPredictor_computer_vision.ipynb
diff --git a/examples/rate_constrained.ipynb b/examples/rate_constrained.ipynb
diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
 
 FAIR = "oxonfair"
 
-version = "0.2.1"
+version = "0.2.1.5"
 
 PYTHON_REQUIRES = ">=3.8"
 
@@ -110,9 +110,9 @@ def default_setup_args(*, version):
 
 extras_require = dict()
 full_requirements = ['matplotlib', 'autogluon.tabular', 'torch', 'xgboost', 'jupyterlab']
-notebook_requirements = full_requirements + ['fairlearn']
-test_requirements = notebook_requirements + ["tox", "pytest", "pytest-cov", 'flake8',
-                                             'linkcheckmd', 'ipynbcompress',]
+notebook_requirements = full_requirements + ['fairlearn', 'fairret']
+test_requirements = notebook_requirements + ["tox", "pytest", "pytest-cov", 'flake8', 'tabulate',
+                                             'linkcheckmd', 'ipynbcompress', 'nbmake']
 
 full_requirements = list(set(full_requirements))
 notebook_requirements = list(set(notebook_requirements))

diff --git a/src/oxonfair/learners/fair.py b/src/oxonfair/learners/fair.py
@@ -394,6 +394,22 @@ def call_fast(grid_width=grid_width):
         else:
             self.frontier = call_slow()
 
+    def frontier_thresholds(self):
+        "Returns the thresholds corresponding to the found frontier"
+        assert self.frontier, "Call fit before frontier_thresholds"
+        return self.frontier[1]
+
+    def frontier_scores(self):
+        "Returns the scores (i.e. objective and constraint) corresponding to the found frontier"
+        assert self.frontier, "Call fit before frontier_scores"
+        return self.frontier[0]
+
+    def set_threshold(self, threshold):
+        """Set the thresholds. 
+           This code allows the manual overriding of the thresholds found by fit to enforce different trade-offs.
+           """
+        self.offset = threshold
+
     def plot_frontier(self, data=None, groups=None, *, objective1=False, objective2=False,
                       show_updated=True, show_original=True, color=None, new_plot=True, prefix='',
                       name_frontier='Frontier', subfig=None) -> None:

diff --git a/tests/test_check_style.py b/tests/test_check_style.py
@@ -62,7 +62,8 @@ def test_md_links():
 
 def test_run_notebooks_without_errors():
     "run pip install nbtest before running this test"
-    check_call(['pytest', '--nbmake', '-n=auto', '--nbmake-timeout=500', 'examples'])
+    for file in glob.glob('./examples/*.ipynb'):
+        check_call(['pytest', '--nbmake',  '--nbmake-timeout=500', file])
 
 
 def test_compress_notebooks():

diff --git a/tests/unittests/test_frontier.py b/tests/unittests/test_frontier.py
@@ -0,0 +1,76 @@
+"""Tests for Frontier"""
+
+import pandas as pd
+import sklearn.ensemble
+import sklearn.tree
+import oxonfair as fair
+from oxonfair.utils import group_metrics as gm
+
+PLT_EXISTS = True
+try:
+    import matplotlib.pyplot as plt
+    plt.title
+except ModuleNotFoundError:
+    PLT_EXISTS = False
+
+classifier_type = sklearn.ensemble.RandomForestClassifier
+
+train_data = pd.read_csv("https://autogluon.s3.amazonaws.com/datasets/Inc/train.csv")
+test_data = pd.read_csv("https://autogluon.s3.amazonaws.com/datasets/Inc/test.csv")
+
+# Merge and shuffle the data
+total_data = pd.concat([train_data, test_data])
+y = total_data["class"] == " <=50K"
+total_data = total_data.drop(columns="class")
+total_data = pd.get_dummies(total_data)
+
+train = total_data.sample(frac=0.5)
+val_test = total_data.drop(train.index)
+train_y = y.iloc[train.index]
+val_test_y = y.drop(train_y.index)
+val = val_test.sample(frac=0.4)
+test = val_test.drop(val.index)
+val_y = y.iloc[val.index]
+test_y = val_test_y.drop(val.index)
+predictor = classifier_type()
+predictor.fit(train, train_y)
+
+train_dict = {"data": train, "target": train_y}
+val_dict = {"data": val, "target": val_y}
+test_dict = {"data": test, "target": test_y}
+
+val_dict_g = fair.DataDict(val_y, val, val['sex_ Female'])
+test_dict_g = fair.DataDict(test_y, test, test['sex_ Female'])
+
+
+def test_recall_diff(use_fast=True):
+    """Sweep out the found frontier for equal opportunity and check for consistency"""
+
+    fpredictor = fair.FairPredictor(predictor, test_dict, "sex_ Female", use_fast=use_fast)
+
+    fpredictor.fit(gm.accuracy, gm.recall.diff, 0.025)
+
+    # Evaluate the change in fairness (recall difference corresponds to EO)
+    measures = fpredictor.evaluate_fairness(verbose=False)
+
+    thresholds = fpredictor.frontier_thresholds()
+    frontier = fpredictor.frontier_scores()
+    metrics = {1: fpredictor.objective1, 2: fpredictor.objective2}
+    for i in range(thresholds.shape[1]):
+        if use_fast is True:
+            fpredictor.set_threshold(thresholds[:, i])
+        else:
+            fpredictor.set_threshold(thresholds[:, :, i])
+        score = frontier[:, i]
+        measures = fpredictor.evaluate(metrics=metrics, verbose=False)['updated']
+        assert measures[1] == score[0]
+        assert measures[2] == score[1]
+
+
+def test_recall_diff_slow():
+    "test slow pathway"
+    test_recall_diff(False)
+
+
+def test_recall_diff_hybrid():
+    test_recall_diff('hybrid')