-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit e5cd0c6
Showing
32 changed files
with
4,026 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# ExplainableAI | ||
|
||
Todo: Summarize purpose of package. | ||
|
||
# Modules to add | ||
|
||
* Select a k for sgmm/sbmm models |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
__version__ = "0.0.1" | ||
__all__ = ["metrics", "models", "visual"] |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import numpy as np | ||
def optimalTau(probabilities, ylabels): | ||
|
||
""" Finds the Optimal tau based on the F1 score""" | ||
|
||
#STEP 1 SORT PROBABILITIES AND LABELS | ||
sortedIndexes = np.argsort( probabilities ) | ||
probabilities1 = probabilities[ sortedIndexes ] | ||
ylabels1 = ylabels[ sortedIndexes ] | ||
|
||
#INITIALIZE THRESHOLD TO BE 0 | ||
#SO EVERY POINT IS PREDICTED AS CLASS 1 | ||
|
||
# initialPrediction = np.ones( probabilities1.shape[0] ) #matrix with all 1's - INITIAL PREDICTION | ||
|
||
TP = len( np.where( ylabels1 == 1)[0] ) #AT THE BEGGINING THE TRUE POSITIVES ARE THE SAME | ||
#AS THE POSITIVE LABELS OF THE DATASET | ||
|
||
FN = 0 #AT THE BEGGINING WE HAVE 0 POSITIVE POINTS CLASSIFIED AS NEGATIVE | ||
#XIAO HERE YOU WILL PUT ylabels == -1 | ||
FP = len( np.where( ylabels1 == -1)[0] ) | ||
|
||
precision = TP/(TP + FP) | ||
recall = TP/ (TP + FN) | ||
|
||
# print(precision, recall, TP, FN, FP) | ||
# return | ||
f1 = ( 2*precision*recall )/( precision + recall ) | ||
|
||
threshold = probabilities1.min()-0.1 | ||
prob_F1 = [[threshold, f1]] | ||
|
||
for i, probability in enumerate( probabilities1 ): | ||
|
||
#print( " Iteration: {}".format(i)) | ||
|
||
|
||
if ylabels1[i] == 1: | ||
|
||
TP -= 1 | ||
FN += 1 | ||
|
||
if ylabels1[i] == -1: #FOR XIAO HERE -1 | ||
FP -= 1 | ||
|
||
if (TP + FP == 0): | ||
|
||
precision = 0 | ||
|
||
else: | ||
precision = TP/(TP + FP) | ||
|
||
recall = TP/ (TP + FN) | ||
|
||
if (precision + recall) == 0: | ||
|
||
f1new = 0 | ||
|
||
else: | ||
|
||
f1new = ( 2*precision*recall )/( precision + recall ) | ||
|
||
prob_F1.append( [probability, f1new] ) #thresholds with F1 scores if you want to draw a graph | ||
|
||
if f1new >= f1 : | ||
threshold = probability | ||
f1 = f1new | ||
prec = precision | ||
rec = recall | ||
|
||
|
||
return threshold, f1, np.array(prob_F1), prec, rec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
__version__ = "0.0.1" | ||
|
||
from .metricsFunctions import calc_metrics, CalculateSoftLogReg, optimalTau,metrics_cluster,sgmmResults | ||
|
||
|
||
__all__ = ["calc_metrics", "ftest_logodds", "metricsFunctions", | ||
"TAUOPTIMAL", "utility"] |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import numpy as np | ||
import pandas as pd | ||
#from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator | ||
#import matplotlib.pyplot as plt | ||
|
||
|
||
def warn(*args, **kwargs): | ||
pass | ||
import warnings | ||
warnings.warn = warn | ||
|
||
def calc_metrics(model = [], cluster = -1, y = [], tau = 0.5, | ||
custom_prob = [], putModels = 0 , X = []): | ||
|
||
""" | ||
COMPUTES METRICS OF THE ALGORITHM | ||
Acuraccy, Balanced acuraccy, Auc, Precision, | ||
RSpecificity, Sensitivity, TP, TN, FP, FN, | ||
Percentage of High Cost Patients | ||
Percentage of Low Cost Patients | ||
y: training or testing labels | ||
tau: Threshold for probabilities | ||
custom_prob: Probabilities produced by the model | ||
based on which you want to calculate | ||
the class, these correspond | ||
for a datapoint to belong to class 1 | ||
putModels: Checks if you put model to do the predictions | ||
or the probabilities for each data point | ||
to belong to class 1. | ||
""" | ||
if putModels != 0 : | ||
probabilities = model.predict_proba( X )[:,1] | ||
|
||
else: | ||
|
||
probabilities = custom_prob | ||
|
||
|
||
auc = roc_auc_score( y , probabilities) | ||
roc = roc_curve(y, probabilities) | ||
|
||
#Calculate tau if calc_tau is 1 | ||
#Given we have provided probability matrix | ||
|
||
|
||
#THRESHOLDING BASED ON TAU IN ORDER TO GET THE | ||
#ESTIMATED LABELS FOR EACH DATAPOINT | ||
probabilities[ np.where( probabilities >= tau ) ] = 1 | ||
probabilities[ np.where( probabilities < tau ) ] = 0 | ||
predictions = probabilities | ||
|
||
#METRICS CALCULATION | ||
precision = precision_score(y, predictions) #CALCULATE THE PRECISION | ||
sensitivity = recall_score(y, predictions) #CALCULATE THE RECALL | ||
accuracy = accuracy_score(y, predictions) #CALCULATE THE ACCURACY | ||
bal_acc = balanced_accuracy_score(y, predictions) #CALCULATE THE BALANCED ACCURACY | ||
f1 = f1_score(y, predictions) | ||
|
||
clusterSize = len( y ) #Cluster Size | ||
highCostPerc = len( np.where( y == 1)[0] )/clusterSize | ||
lowCostPerc = len( np.where( y == 0)[0] )/clusterSize | ||
|
||
|
||
TP = len( np.where( (y == 1) * (predictions == 1) )[0] ) | ||
TN = len( np.where( (y == 0) * (predictions == 0) )[0] ) | ||
|
||
FP = len( np.where( (y == 0) * (predictions == 1) )[0] ) | ||
|
||
FN = len( np.where( (y == 1) * (predictions == 0) )[0] ) | ||
|
||
#print(TP, TN, FP, FN, clusterSize) | ||
|
||
specificity = TN/(FP + TN) | ||
FPR = 1 - specificity | ||
|
||
#PUT ALL THE METRICS IN A LIST AND RETURN THEM | ||
metrics = [cluster, clusterSize, highCostPerc, lowCostPerc, | ||
TP, TN, FP, FN, | ||
FPR, specificity, sensitivity, precision, | ||
accuracy, bal_acc, f1, auc] | ||
|
||
return metrics, roc |
Oops, something went wrong.