add directories

TheRensselaerIDEA · Feb 18, 2020 · e5cd0c6 · e5cd0c6
commit e5cd0c6
Show file tree

Hide file tree

Showing 32 changed files with 4,026 additions and 0 deletions.
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -0,0 +1,7 @@
+# ExplainableAI
+
+Todo: Summarize purpose of package.
+
+# Modules to add
+
+* Select a k for sgmm/sbmm models
diff --git a/explainableAI/.DS_Store b/explainableAI/.DS_Store
diff --git a/explainableAI/__init__.py b/explainableAI/__init__.py
@@ -0,0 +1,2 @@
+__version__ = "0.0.1"
+__all__  = ["metrics", "models", "visual"]
diff --git a/explainableAI/__pycache__/__init__.cpython-37.pyc b/explainableAI/__pycache__/__init__.cpython-37.pyc
diff --git a/explainableAI/metrics/.DS_Store b/explainableAI/metrics/.DS_Store
diff --git a/explainableAI/metrics/TAUOPTIMAL.py b/explainableAI/metrics/TAUOPTIMAL.py
@@ -0,0 +1,72 @@
+import numpy as np
+def optimalTau(probabilities, ylabels):
+
+            """ Finds the Optimal tau based on the F1 score"""
+
+            #STEP 1 SORT PROBABILITIES AND LABELS
+            sortedIndexes = np.argsort( probabilities )
+            probabilities1 = probabilities[ sortedIndexes ]
+            ylabels1 = ylabels[ sortedIndexes ]
+
+            #INITIALIZE THRESHOLD TO BE 0
+            #SO EVERY POINT  IS PREDICTED AS CLASS 1
+
+           # initialPrediction = np.ones( probabilities1.shape[0] ) #matrix with all 1's - INITIAL PREDICTION
+
+            TP = len( np.where( ylabels1 == 1)[0] )  #AT THE BEGGINING THE TRUE POSITIVES ARE THE SAME 
+                                                    #AS THE POSITIVE LABELS OF THE DATASET
+
+            FN = 0 #AT THE BEGGINING  WE HAVE 0 POSITIVE POINTS  CLASSIFIED AS NEGATIVE
+            #XIAO HERE YOU WILL PUT  ylabels == -1
+            FP = len( np.where( ylabels1 == -1)[0] )
+
+            precision = TP/(TP + FP)
+            recall = TP/ (TP + FN)
+
+#            print(precision, recall, TP, FN, FP)
+#            return
+            f1 = ( 2*precision*recall )/( precision + recall )   
+
+            threshold = probabilities1.min()-0.1
+            prob_F1 = [[threshold, f1]]
+
+            for i, probability in enumerate( probabilities1 ):
+
+                #print( " Iteration: {}".format(i))
+
+
+                if ylabels1[i] == 1:
+
+                    TP -= 1
+                    FN += 1
+
+                if ylabels1[i] == -1: #FOR XIAO HERE -1
+                    FP -= 1
+
+                if (TP + FP == 0):
+
+                    precision = 0
+
+                else:
+                    precision = TP/(TP + FP)
+
+                recall = TP/ (TP + FN)
+
+                if (precision + recall) == 0:
+
+                    f1new = 0
+
+                else:
+
+                    f1new = ( 2*precision*recall )/( precision + recall )  
+
+                prob_F1.append( [probability, f1new] )   #thresholds with F1 scores if you want to draw a graph
+
+                if f1new >= f1 :
+                    threshold = probability
+                    f1 = f1new
+                    prec = precision
+                    rec = recall
+
+
+            return threshold, f1, np.array(prob_F1), prec, rec
diff --git a/explainableAI/metrics/__init__.py b/explainableAI/metrics/__init__.py
@@ -0,0 +1,7 @@
+__version__ = "0.0.1"
+
+from .metricsFunctions import calc_metrics, CalculateSoftLogReg, optimalTau,metrics_cluster,sgmmResults
+
+
+__all__ = ["calc_metrics", "ftest_logodds", "metricsFunctions",
+			"TAUOPTIMAL", "utility"]
diff --git a/explainableAI/metrics/__pycache__/__init__.cpython-37.pyc b/explainableAI/metrics/__pycache__/__init__.cpython-37.pyc
diff --git a/explainableAI/metrics/__pycache__/ftest_logodds.cpython-37.pyc b/explainableAI/metrics/__pycache__/ftest_logodds.cpython-37.pyc
diff --git a/explainableAI/metrics/__pycache__/metricsFunctions.cpython-37.pyc b/explainableAI/metrics/__pycache__/metricsFunctions.cpython-37.pyc
diff --git a/explainableAI/metrics/__pycache__/utility.cpython-37.pyc b/explainableAI/metrics/__pycache__/utility.cpython-37.pyc
diff --git a/explainableAI/metrics/calc_metrics.py b/explainableAI/metrics/calc_metrics.py
@@ -0,0 +1,85 @@
+import numpy as np
+import pandas as pd
+#from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
+#import matplotlib.pyplot as plt
+
+
+def warn(*args, **kwargs):
+    pass
+import warnings
+warnings.warn = warn
+
+def calc_metrics(model = [], cluster = -1, y = [], tau = 0.5, 
+                 custom_prob = [], putModels = 0 , X = []):
+
+             """              
+                 COMPUTES METRICS OF THE ALGORITHM
+                 Acuraccy, Balanced acuraccy, Auc, Precision,
+                 RSpecificity, Sensitivity,  TP, TN, FP, FN,
+                 Percentage of High Cost Patients
+                 Percentage of Low Cost Patients
+                 
+                  
+                 y: training or testing labels 
+                 tau: Threshold for probabilities
+                 custom_prob: Probabilities produced by the model
+                              based  on which you want to calculate
+                              the class, these correspond
+                              for a datapoint to belong to class 1
+                 putModels:  Checks if you put  model to do the predictions
+                             or the probabilities for each data point 
+                             to belong to  class 1.
+                     
+             """
+             if  putModels != 0 :
+                 probabilities = model.predict_proba( X )[:,1]
+
+             else:
+
+                 probabilities = custom_prob
+
+
+             auc = roc_auc_score( y , probabilities)  
+             roc = roc_curve(y, probabilities)
+
+             #Calculate tau if calc_tau is 1
+             #Given we have provided probability matrix
+
+
+             #THRESHOLDING BASED ON TAU IN ORDER TO GET THE 
+             #ESTIMATED LABELS FOR EACH DATAPOINT
+             probabilities[ np.where( probabilities >= tau ) ] = 1
+             probabilities[ np.where( probabilities < tau ) ] = 0
+             predictions = probabilities
+
+             #METRICS CALCULATION
+             precision =  precision_score(y, predictions) #CALCULATE THE PRECISION
+             sensitivity = recall_score(y, predictions)  #CALCULATE THE RECALL
+             accuracy = accuracy_score(y, predictions) #CALCULATE THE ACCURACY
+             bal_acc = balanced_accuracy_score(y, predictions) #CALCULATE THE BALANCED ACCURACY
+             f1 = f1_score(y, predictions)
+
+             clusterSize = len( y )  #Cluster Size
+             highCostPerc = len( np.where( y == 1)[0] )/clusterSize
+             lowCostPerc = len( np.where( y == 0)[0] )/clusterSize
+
+
+             TP = len( np.where(  (y == 1) * (predictions == 1) )[0] )
+             TN = len( np.where(  (y == 0) * (predictions == 0) )[0] )
+
+             FP = len( np.where(  (y == 0) * (predictions == 1) )[0] )
+
+             FN = len( np.where(  (y == 1) * (predictions == 0) )[0] )
+
+             #print(TP, TN, FP, FN, clusterSize)
+
+             specificity = TN/(FP + TN)
+             FPR = 1 - specificity
+
+             #PUT ALL THE METRICS IN A LIST AND RETURN THEM
+             metrics =  [cluster, clusterSize, highCostPerc, lowCostPerc,
+                         TP, TN, FP, FN,
+                         FPR, specificity, sensitivity, precision, 
+                         accuracy, bal_acc, f1, auc]
+
+             return metrics, roc
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		__version__ = "0.0.1"
		__all__ = ["metrics", "models", "visual"]