-
Notifications
You must be signed in to change notification settings - Fork 0
/
temp17.py
331 lines (259 loc) · 14.9 KB
/
temp17.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
import itertools, random
#Ok, so basically this program should take the measurements of each character in the alphabet plus capitals, then create a list of
#all possible combinations of letters and check to see if they fit the width of a redacted area of text
#obviously, if an accepted combination (because it's the correct size) contains spaces, then obviously it's not going to come up
#with a word in the dictionary, so it then adds those strings to a separate list then splits them by spaces
#then runs it through the dictionary
#ps I'd prefer it if you kept this to yourself for now, wanting to write a paper on it
#:)
#00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#Open data files
TextFile_CharacterData_EntireFile = open("ConfigTest2.txt","r")
Lst_CharacterData = list(TextFile_CharacterData_EntireFile.readlines())
TextFile_Dictionary_EntireFile = open("github_english_words_master.txt","r")
Lst_DictionarySearch = list(TextFile_Dictionary_EntireFile.readlines())
#End
#00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#1) Calculate semi-colon positions for constants line (line 1)
#2) Declare constants
intTemp0 = Lst_CharacterData[1].find(";") #semi-colon 0, line 1
intTemp1 = Lst_CharacterData[1].find(";", intTemp0 + 1) #semi-colon 1, line 1
intTemp2 = Lst_CharacterData[1].find(";", intTemp1 + 1) #semi-colon 2, line 1
intTemp3 = Lst_CharacterData[1].find(";", intTemp2 + 1) #semi-colon 3, line 1
intTemp4 = Lst_CharacterData[1].find(";", intTemp3 + 1) #semi-colon 4, line 1
nCharacters = int(Lst_CharacterData[1][0:intTemp0])
min_nSamples_perCharacter = int(Lst_CharacterData[1][(intTemp0 + 1):(intTemp1)])
max_RSD_perCharacter = int(Lst_CharacterData[1][(intTemp1 + 1):(intTemp2)])
Width_RedactedText = int(Lst_CharacterData[1][(intTemp2 + 1):(intTemp3)])
const_Width_RedactedText = 55 #TEMP VALUE ONLY, THIS LINE NEEDS TO BE DELETED
#00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#Prepare lists and tuples for analysis
Lst_CharacterData_LengthOf = len(Lst_CharacterData)
Lst_DictionarySearch_LengthOf = len(Lst_DictionarySearch)
#strips all new line characters from Lst_CharacterData and Lst_DictionarySearch
for i in range(0, nCharacters, 1):
Lst_CharacterData[i] = (str(Lst_CharacterData[i])).strip("\n")
for i in range(0, Lst_DictionarySearch_LengthOf, 1):
Lst_DictionarySearch[i] = (str(Lst_DictionarySearch[i])).strip("\n")
tempList0 = []
tempList1 = []
tempList2 = []
tempList3 = []
tempList4 = []
Lst_PossibleAnswers = []
Lst_PossibleAnswers2 = []
Lst_Rejections = []
for i in range(0, nCharacters, 1):
tempString = (str((Lst_CharacterData[i + 3]))).split(";") #splits each line (starting from 3) from Lst_CharacterData by semi-colon
tempList0.append(tempString[0]) #adds the first element of the split line (line of code above this one) and adds to a temporary list
tempList1.append(int(tempString[1]))
tempList2.append(int(tempString[2]))
tempList3.append(int(tempString[3]))
tempList4.append(tempString[4])
tpl_iCharacter = tuple(tempList0) #creates a tuple from each element of the list 'tempList0'
tpl_iCharAvrgWidth = tuple(tempList1)
tpl_iCharacter_nSample = tuple(tempList2)
tpl_iCharacter_RSD = tuple(tempList3)
tpl_iCharacter_Samples = tuple(tempList4)
#End
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#Program start
print("===============================================================================")
print("SYSTEM TIME: " + "ERROR") #NEEDS AMMENDING
print("SYSTEM DATE: " + "ERROR") # NEEDS AMMENDING
print("")
#End
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#Check redacted text status
if Width_RedactedText > 0:
print("Redacted text width value: Value present (" + str(Width_RedactedText) + ")")
elif Width_RedactedText == 0:
print("Redacted text width value: Value missing")
else:
print("Redacted text width value: ERROR")
#End
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#Check sample n values
ZeroValues_SampleNo_count = 0
LowValues_SampleNo_count = 0
#Checks to see how many characters have a) low sample numbers, and b) zero values
for i in range(0, nCharacters, 1):
if tpl_iCharacter_nSample[i] < min_nSamples_perCharacter:
LowValues_SampleNo_count = LowValues_SampleNo_count + 1
elif tpl_iCharacter_nSample[i] == 0:
ZeroValues_SampleNo_count = ZeroValues_SampleNo_count + 1
Stat_Smpl_cnt_resp1 = "At least one character has less samples than specified (" + str(min_nSamples_perCharacter) + ")"
Stat_Smpl_cnt_resp2 = "All characters have acceptable sample numbers"
Stat_Smpl_cnt_resp3 = "At least one character has no samples"
Stat_Smpl_cnt_resp4 = "All characters have at least one sample"
Stat_Smpl_cnt_resp5 = "ERROR"
if LowValues_SampleNo_count > 0 and ZeroValues_SampleNo_count == 0:
print("Sample number check: " + Stat_Smpl_cnt_resp1)
print(" " + Stat_Smpl_cnt_resp4)
elif LowValues_SampleNo_count == 0 and ZeroValues_SampleNo_count > 0:
print("Sample number check: " + Stat_Smpl_cnt_resp2)
print(" " + Stat_Smpl_cnt_resp3)
elif LowValues_SampleNo_count > 0 and ZeroValues_SampleNo_count > 0:
print("Sample number check: " + Stat_Smpl_cnt_resp1)
print(" " + Stat_Smpl_cnt_resp3)
else:
print("Sample number check: " + Stat_Smpl_cnt_resp5)
#End
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#Check RSD values
HighValues_RSD_count = 0
ZeroValues_RSD_count = 0
#Checks to see how many characters have a) high RSD values, and b) zero values
for i in range(0, nCharacters, 1):
if tpl_iCharacter_RSD[i] > max_RSD_perCharacter:
HighValues_RSD_count = HighValues_RSD_count + 1
elif tpl_iCharacter_RSD[i] == 0:
ZeroValues_RSD_count = ZeroValues_RSD_count + 1
Stat_RSD_cnt_resp1 = "At least one character has an RSD value greater than the one specified (" + str(max_RSD_perCharacter) + ")"
Stat_RSD_cnt_resp2 = "All characters have acceptable RSD values"
Stat_RSD_cnt_resp3 = "At least one character has an RSD value of zero"
Stat_RSD_cnt_resp4 = "All characters have a value present in the RSD column"
Stat_RSD_cnt_resp5 = "ERROR"
if HighValues_RSD_count > 0 and ZeroValues_RSD_count == 0:
print("STDEV values check: " + Stat_RSD_cnt_resp1)
print(" " + Stat_RSD_cnt_resp4)
elif HighValues_RSD_count == 0 and ZeroValues_RSD_count > 0:
print("STDEV values check: " + Stat_RSD_cnt_resp2)
print(" " + Stat_RSD_cnt_resp3)
elif HighValues_RSD_count > 0 and ZeroValues_RSD_count > 0:
print("STDEV values check: " + Stat_RSD_cnt_resp1)
print(" " + Stat_RSD_cnt_resp3)
else:
print("STDEV values check: " + Stat_RSD_cnt_resp5)
#End
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#Find smallest character in array
#Set initial value to the value of the first item in the array
smallestWidth_inArray = tpl_iCharAvrgWidth[0]
for i in range(0, const_nCharacters, 1):
#if Width of Character x is less than min_CharacterWidth_inArray
if tpl_iCharAvrgWidth[i] < smallestWidth_inArray:
#then set min_CharacterWidth_inArray = Width of Character x
smallestWidth_inArray = tpl_iCharAvrgWidth[i]
max_CharactersPossible = 0
while const_Width_RedactedText > (smallestWidth_inArray * max_CharactersPossible):
max_CharactersPossible = max_CharactersPossible + 1
if max_CharactersPossible < 30:
MaxValue_forAnalysis = max_CharactersPossible #because range() starts at 1 this time, we don't need to + 1 onto this value
else:
MaxValue_forAnalysis = 30 #remember when using range() top value is excluded, so 30 not 29
print("")
print("Smallest character in array: " + str(smallestWidth_inArray))
print("Maximum number of characters in text: " + str(max_CharactersPossible))
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#Print character table
print("")
print("Character Table")
print("===============================================================================")
print("Character" + " " + "Rel. Avrg. Width" + " " + "Samples (n)" + " " + "RSD (%)" + " " + "Sample list")
for i in range(0, const_nCharacters, 1):
TempStringQ = str(tpl_iCharacter[i]).ljust(9)
TempStringR = str(tpl_iCharAvrgWidth[i]).ljust(16)
TempStringS = str(tpl_iCharacter_nSample[i]).ljust(11)
TempStringT = str(tpl_iCharacter_RSD[i]).ljust(7)
TempStringU = str(tpl_iCharacter_Samples[i])
print(TempStringQ + " " + TempStringR + " " + TempStringS + " " + TempStringT + " " + TempStringU)
#End
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#00000000000000000000000000000000000000000 ANALYSIS START 0000000000000000000000000000000000
print("")
print("Analysis start")
print("===============================================================================")
Lst_iCharacter_Results = []
for iCombinationLength in range(1, MaxValue_forAnalysis): #TEMP VALUES ONLY, REMEMBER TO RETURN VALUES TO "1, MaxValue_forAnalysis"
for aCombination in itertools.product(tpl_iCharacter, repeat=iCombinationLength):
str_Combination = ''.join(aCombination)
str_Combinations_forInt = '+'.join(aCombination)
Lst_iCharacter_Results = []
tmp_sum = 0
Lst_iCharacter_Results = list(aCombination)
Lst_iCharacter_Results_LengthOf = len(Lst_iCharacter_Results)
for k in range(0, Lst_iCharacter_Results_LengthOf, 1):
for i in range(0, const_nCharacters, 1):
if str(Lst_iCharacter_Results[k]) == str(tpl_iCharacter[i]):
tmp_sum = tmp_sum + tpl_iCharAvrgWidth[i]
if tpl_iCharAvrgWidth[52] + tpl_iCharAvrgWidth[52] + tmp_sum == const_Width_RedactedText and str_Combination.count(" ") == 0:
Lst_PossibleAnswers.append(str_Combination)
print("> Adding " + str_Combination + " to Lst_PossibleAnswers with a value of " + str(tmp_sum) + " [" + str(const_Width_RedactedText) + " - (2 * " + str(tpl_iCharAvrgWidth[52]) + ") = " + str(const_Width_RedactedText-(2*tpl_iCharAvrgWidth[52])) + "]")
elif tpl_iCharAvrgWidth[52] + tpl_iCharAvrgWidth[52] + tmp_sum == const_Width_RedactedText and str_Combination.count(" ") > 0:
Lst_Rejections.append(str_Combination)
print("> Adding " + str_Combination + " to Lst_Rejections with " + str_Combination.count(" ") + " spaces")
else:
print("> Rejecting " + str_Combination + " with a non-matching value of " + str(tmp_sum))
print("===============================================================================")
print("")
#End
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#Split Lst_Rejections items by space, then run against dictionary again
print("")
print("Splitting Lst_Rejection items")
print("===============================================================================")
Lst_tempstring = []
Lst_Rejections_LengthOf = int(len(Lst_Rejections))
count = 0
if Lst_Rejections_LengthOf > 0:
for i in range(0, Lst_Rejections_LengthOf, 1):
if (str(Lst_Rejections[i])).count(" ") > 0:
Lst_tempstring = str(Lst_Rejections[i]).split(" ")
Lst_tempstring_LengthOf = int(len(Lst_tempstring))
for k in range(0, Lst_PossibleAnswers_LengthOf, 1):
count = 0
for j in range(0, Lst_DictionarySearch_LengthOf, 1):
if str(Lst_PossibleAnswers[k]) == str(Lst_DictionarySearch[j]):
count = count + 1
if count > (Lst_tempstring_LengthOf-1):
Lst_PossibleAnswers2.append(str(Lst_PossibleAnswers[k]))
print("> Adding " + str(Lst_PossibleAnswers[k]) + " to Lst_PossibleAnswers")
elif count < Lst_tempstring_LengthOf:
print("> Rejecting " + str_Combination)
else:
print("> String error. Function returned an error for string " + (str(Lst_PossibleAnswers[k])))
else:
print("> No items!")
print("===============================================================================")
print("")
#End
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
#Check items from Lst_PossibleAnswers against dictionary
Lst_PossibleAnswers_LengthOf = int(len(Lst_PossibleAnswers))
if Lst_PossibleAnswers_LengthOf > 0:
for i in range(0, Lst_PossibleAnswers_LengthOf, 1):
count = 0
for k in range(0, Lst_DictionarySearch_LengthOf, 1):
if str(Lst_PossibleAnswers[i]) == str(Lst_DictionarySearch[k]): #DO I NEED TO MAKE THE SEARCH STRING AND THE DICTIONARY TUPLE ALL LOWER CASE?
count = count + 1
if count > 0:
Lst_PossibleAnswers2.append(str(Lst_PossibleAnswers[i]))
print("> Adding " + str(Lst_PossibleAnswers[i]) + " to Lst_PossibleAnswers2")
elif count == 0:
print("> " + str(Lst_PossibleAnswers[i]) + " rejected.")
else:
print("> String error. Function returned an error for string " + (str(Lst_PossibleAnswers[i])))
elif Lst_PossibleAnswers_LengthOf == 0:
print("> No items!")
#End
#0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
print("")
print("===============================================================================")
print("Items in array Lst_PossibleAnswers2:")
print("")
Lst_PossibleAnswers2_LengthOf = int(len(Lst_PossibleAnswers2))
if Lst_PossibleAnswers2_LengthOf == 0:
print("> No items!")
elif Lst_PossibleAnswers2_LengthOf != 0:
for i in range(0, Lst_PossibleAnswers2_LengthOf, 1):
print("> String " + str(i) + ": " + Lst_PossibleAnswers2[i])