forked from davidhallac/TICC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
UnitTest.py
71 lines (56 loc) · 3.14 KB
/
UnitTest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import unittest
from TICC_solver import TICC
import numpy as np
import sys
class TestStringMethods(unittest.TestCase):
def test_example(self):
fname = "example_data.txt"
ticc = TICC(window_size = 1,number_of_clusters = 8, lambda_parameter = 11e-2, beta = 600, maxIters = 100,
threshold = 2e-5, write_out_file = False, prefix_string = "output_folder/", num_proc=1)
(cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname)
assign = np.loadtxt("UnitTest_Data/Results.txt")
val = abs(assign - cluster_assignment)
self.assertEqual(sum(val), 0)
# Test prediction works with batch of data outside of `fit` method. Perhaps there is a better way
# to test this in parallel so these are more like unit tests rather than integration tests?
test_batch = ticc.predict_clusters(ticc.trained_model['complete_D_train'][0:1000, ])
batch_val = abs(test_batch - cluster_assignment[0:1000])
self.assertEqual(sum(batch_val), 0)
# Test streaming by passing in 5 row blocks at a time (current timestamp and previous 4)
# I am causing data leakage by training on the whole set and then using the trained model while streaming,
# but this is for testing the code, so it is ok
# TODO: figure out why larger blocks don't improve predictions more. Reference:
# https://github.com/davidhallac/TICC/issues/18#issuecomment-384514116
def test_streaming(block_size):
test_stream = np.zeros(1000)
test_stream[0:block_size] = cluster_assignment[0:block_size]
for i in range(block_size, 1000):
point = ticc.trained_model['complete_D_train'][i - block_size:i, ]
test_stream[i] = ticc.predict_clusters(point)[block_size - 1]
percent_correct_streaming = 100 * sum(cluster_assignment[0:1000] == test_stream) / 1000.0
self.assertGreater(percent_correct_streaming, 0.9)
test_streaming(5)
for i in range(8):
mrf = np.loadtxt("UnitTest_Data/cluster_"+str(i)+".txt",delimiter=',')
try:
np.testing.assert_array_almost_equal(mrf, cluster_MRFs[i], decimal=3)
except AssertionError:
#Test failed
self.assertTrue(1==0)
def test_multiExample(self):
fname = "example_data.txt"
ticc = TICC(window_size = 5,number_of_clusters = 5, lambda_parameter = 11e-2, beta = 600, maxIters = 100,
threshold = 2e-5, write_out_file = False, prefix_string = "output_folder/", num_proc=1)
(cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname)
assign = np.loadtxt("UnitTest_Data/multiResults.txt")
val = abs(assign - cluster_assignment)
self.assertEqual(sum(val), 0)
for i in range(5):
mrf = np.loadtxt("UnitTest_Data/multiCluster_"+str(i)+".txt",delimiter=',')
try:
np.testing.assert_array_almost_equal(mrf, cluster_MRFs[i], decimal=3)
except AssertionError:
#Test failed
self.assertTrue(1==0)
if __name__ == '__main__':
unittest.main()