-
Notifications
You must be signed in to change notification settings - Fork 0
/
cqqlCFL.py
190 lines (158 loc) · 7.22 KB
/
cqqlCFL.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import itertools
import util as ut
class CQQL_Classifier:
@staticmethod
def classify(data, minterms, activeMinterms, threshold):
# number of correctly classified classes
correctlyPredictedClasses = 0
predictedClasses = 0
# object is evaluated over all minterms
for i in range(len(data) - 1):
objectClass = 0
sumDisjunctions = 0
for index, activeMinterm in enumerate(activeMinterms):
if activeMinterm == 1:
minterm = minterms[index]
else:
continue
prod = 1
for j in range(len(data[i]) - 1):
if minterm[j] == 1:
prod *= data[i][j]
elif minterm[j] == 0:
prod *= 1 - data[i][j]
else:
continue
sumDisjunctions += prod
predictedClasses += 1
if sumDisjunctions > threshold:
objectClass = 1
if data[i][len(data[i]) - 1] == objectClass:
correctlyPredictedClasses += 1
accuracy = correctlyPredictedClasses / predictedClasses
return accuracy
@staticmethod
def findThreshold(self, validationData, minterms, activeMinterms):
# split data for calculations
splitData = ut.Util.splitClasses(validationData)
positiveData = splitData[0]
negativeData = splitData[1]
threshold = 0
# find max value of negative training data und min value of positive training data
maxNegative = 0
minPositive = 1
for i in range(len(positiveData) - 1):
sumOfDNF = 0
for index, activeMinterm in enumerate(activeMinterms):
if activeMinterm == 1:
minterm = minterms[index]
else:
continue
prodOfMinterm = 1
for j in range(len(positiveData[i]) - 1):
prodOfMinterm *= positiveData[i][j] if minterm[j] == 1 else (1 - positiveData[i][j])
sumOfDNF += prodOfMinterm
minPositive = sumOfDNF if sumOfDNF < minPositive else minPositive
for i in range(len(negativeData) - 1):
sumOfDNF = 0
for index, activeMinterm in enumerate(activeMinterms):
if activeMinterm == 1:
minterm = minterms[index]
else:
continue
prodOfMinterm = 1
for j in range(len(negativeData[i]) - 1):
prodOfMinterm *= negativeData[i][j] if minterm[j] == 1 else (1 - negativeData[i][j])
sumOfDNF += prodOfMinterm
maxNegative = sumOfDNF if sumOfDNF > maxNegative else maxNegative
# check if maximum value < minimum value(happens if objects are well seperated)
if maxNegative < minPositive:
threshold = (maxNegative + minPositive) / 2
# else we need to maximize the discrete accuracy
# we have to find the threshold value from the interval [minPositive, maxNegative]
# this can be evaluated against a validation set
else:
# set initial values
diff = 1
lowerBoundary = min(minPositive, maxNegative)
upperBoundary = max(minPositive, maxNegative)
middlePoint = (lowerBoundary + upperBoundary) / 2
lower = middlePoint - 0.00000001
upper = middlePoint + 0.00000001
# stop loop as soon as the accuracy doesn't fluctuate more than 0.01 anymore
while diff > 0.01:
accLower = CQQL_Classifier.classify(validationData, minterms, activeMinterms, lower)
accUpper = CQQL_Classifier.classify(validationData, minterms, activeMinterms, upper)
# calculate difference between both accuracies
diff = abs(accLower - accUpper)
# set the better threshold
# create the new intervals
if accLower > accUpper:
threshold = lower
# set new upper limit as the other one isn't needed anymore
# also set new lower and upper
middlePoint = (lower + middlePoint) / 2
lower = middlePoint - 0.001
upper = middlePoint + 0.001
elif accUpper > accLower:
threshold = upper
# set new upper limit as the other one isn't needed anymore
# also set new lower and upper
middlePoint = (middlePoint + upper) / 2
lower = middlePoint - 0.001
upper = middlePoint + 0.001
elif diff < 0.01:
threshold = max(lower, upper)
return threshold
return threshold
# create all minterms
@staticmethod
def createMinterms(size: int):
minterms = list(itertools.product([0, 1], repeat=size - 1))
return minterms
# extract all minterms that fullfill the condition that leads to class 1
def extractMinterms(self, trainingData, minterms, theta_p_threshold):
# split data for calculations
splitData = ut.Util.splitClasses(trainingData)
positiveData = splitData[0]
negativeData = splitData[1]
numberOfClassOne = len(positiveData)
numberOfClassZero = len(negativeData)
# active/inactive minterm list
activeList = []
# iterate over every minterm to decide if it should be removed
for minterm in minterms:
sumPos = 0
sumNeg = 0
# sum all products of each object where class is 1
for i in range(len(positiveData) - 1):
prod = 1
for j in range(len(positiveData[i]) - 1):
prod *= positiveData[i][j] if minterm[j] == 1 else (1 - positiveData[i][j])
sumPos += prod
# sum all products of each object where class is 0
for i in range(len(negativeData) - 1):
prod = 1
for j in range(len(negativeData[i - 1]) - 1):
prod *= negativeData[i][j] if minterm[j] == 1 else (1 - negativeData[i][j])
sumNeg += prod
gamma = numberOfClassZero / (numberOfClassOne + numberOfClassZero)
lambdA = 0.5
# check if the classes are evenly distributed
if gamma == 1 / 2:
# if the sum of positive training data is greater than negative training data:
# remove the minterm from the list as it is unneeded
if sumPos > sumNeg:
activeList.append(1)
else:
activeList.append(0)
# if unevenly distributed, chose this
else:
# lambda value chosen arbitrarly
stability_threshold = (gamma * lambdA * sumPos) / (
gamma * lambdA * sumPos + (1 - gamma) * (1 - lambdA) * sumNeg)
if stability_threshold > theta_p_threshold:
activeList.append(1)
else:
activeList.append(0)
return activeList