Skip to content

Commit

Permalink
add directories
Browse files Browse the repository at this point in the history
  • Loading branch information
josefigueroa168 committed Feb 18, 2020
0 parents commit e5cd0c6
Show file tree
Hide file tree
Showing 32 changed files with 4,026 additions and 0 deletions.
Empty file added LICENSE
Empty file.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# ExplainableAI

Todo: Summarize purpose of package.

# Modules to add

* Select a k for sgmm/sbmm models
Binary file added explainableAI/.DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions explainableAI/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
__version__ = "0.0.1"
__all__ = ["metrics", "models", "visual"]
Binary file added explainableAI/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file added explainableAI/metrics/.DS_Store
Binary file not shown.
72 changes: 72 additions & 0 deletions explainableAI/metrics/TAUOPTIMAL.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import numpy as np
def optimalTau(probabilities, ylabels):

""" Finds the Optimal tau based on the F1 score"""

#STEP 1 SORT PROBABILITIES AND LABELS
sortedIndexes = np.argsort( probabilities )
probabilities1 = probabilities[ sortedIndexes ]
ylabels1 = ylabels[ sortedIndexes ]

#INITIALIZE THRESHOLD TO BE 0
#SO EVERY POINT IS PREDICTED AS CLASS 1

# initialPrediction = np.ones( probabilities1.shape[0] ) #matrix with all 1's - INITIAL PREDICTION

TP = len( np.where( ylabels1 == 1)[0] ) #AT THE BEGGINING THE TRUE POSITIVES ARE THE SAME
#AS THE POSITIVE LABELS OF THE DATASET

FN = 0 #AT THE BEGGINING WE HAVE 0 POSITIVE POINTS CLASSIFIED AS NEGATIVE
#XIAO HERE YOU WILL PUT ylabels == -1
FP = len( np.where( ylabels1 == -1)[0] )

precision = TP/(TP + FP)
recall = TP/ (TP + FN)

# print(precision, recall, TP, FN, FP)
# return
f1 = ( 2*precision*recall )/( precision + recall )

threshold = probabilities1.min()-0.1
prob_F1 = [[threshold, f1]]

for i, probability in enumerate( probabilities1 ):

#print( " Iteration: {}".format(i))


if ylabels1[i] == 1:

TP -= 1
FN += 1

if ylabels1[i] == -1: #FOR XIAO HERE -1
FP -= 1

if (TP + FP == 0):

precision = 0

else:
precision = TP/(TP + FP)

recall = TP/ (TP + FN)

if (precision + recall) == 0:

f1new = 0

else:

f1new = ( 2*precision*recall )/( precision + recall )

prob_F1.append( [probability, f1new] ) #thresholds with F1 scores if you want to draw a graph

if f1new >= f1 :
threshold = probability
f1 = f1new
prec = precision
rec = recall


return threshold, f1, np.array(prob_F1), prec, rec
7 changes: 7 additions & 0 deletions explainableAI/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
__version__ = "0.0.1"

from .metricsFunctions import calc_metrics, CalculateSoftLogReg, optimalTau,metrics_cluster,sgmmResults


__all__ = ["calc_metrics", "ftest_logodds", "metricsFunctions",
"TAUOPTIMAL", "utility"]
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
85 changes: 85 additions & 0 deletions explainableAI/metrics/calc_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import numpy as np
import pandas as pd
#from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
#import matplotlib.pyplot as plt


def warn(*args, **kwargs):
pass
import warnings
warnings.warn = warn

def calc_metrics(model = [], cluster = -1, y = [], tau = 0.5,
custom_prob = [], putModels = 0 , X = []):

"""
COMPUTES METRICS OF THE ALGORITHM
Acuraccy, Balanced acuraccy, Auc, Precision,
RSpecificity, Sensitivity, TP, TN, FP, FN,
Percentage of High Cost Patients
Percentage of Low Cost Patients
y: training or testing labels
tau: Threshold for probabilities
custom_prob: Probabilities produced by the model
based on which you want to calculate
the class, these correspond
for a datapoint to belong to class 1
putModels: Checks if you put model to do the predictions
or the probabilities for each data point
to belong to class 1.
"""
if putModels != 0 :
probabilities = model.predict_proba( X )[:,1]

else:

probabilities = custom_prob


auc = roc_auc_score( y , probabilities)
roc = roc_curve(y, probabilities)

#Calculate tau if calc_tau is 1
#Given we have provided probability matrix


#THRESHOLDING BASED ON TAU IN ORDER TO GET THE
#ESTIMATED LABELS FOR EACH DATAPOINT
probabilities[ np.where( probabilities >= tau ) ] = 1
probabilities[ np.where( probabilities < tau ) ] = 0
predictions = probabilities

#METRICS CALCULATION
precision = precision_score(y, predictions) #CALCULATE THE PRECISION
sensitivity = recall_score(y, predictions) #CALCULATE THE RECALL
accuracy = accuracy_score(y, predictions) #CALCULATE THE ACCURACY
bal_acc = balanced_accuracy_score(y, predictions) #CALCULATE THE BALANCED ACCURACY
f1 = f1_score(y, predictions)

clusterSize = len( y ) #Cluster Size
highCostPerc = len( np.where( y == 1)[0] )/clusterSize
lowCostPerc = len( np.where( y == 0)[0] )/clusterSize


TP = len( np.where( (y == 1) * (predictions == 1) )[0] )
TN = len( np.where( (y == 0) * (predictions == 0) )[0] )

FP = len( np.where( (y == 0) * (predictions == 1) )[0] )

FN = len( np.where( (y == 1) * (predictions == 0) )[0] )

#print(TP, TN, FP, FN, clusterSize)

specificity = TN/(FP + TN)
FPR = 1 - specificity

#PUT ALL THE METRICS IN A LIST AND RETURN THEM
metrics = [cluster, clusterSize, highCostPerc, lowCostPerc,
TP, TN, FP, FN,
FPR, specificity, sensitivity, precision,
accuracy, bal_acc, f1, auc]

return metrics, roc
Loading

0 comments on commit e5cd0c6

Please sign in to comment.