UnitTest.py

import unittest
from TICC_solver import TICC
import numpy as np
import sys


class TestStringMethods(unittest.TestCase):

    def test_example(self):
        fname = "example_data.txt"
        ticc = TICC(window_size = 1,number_of_clusters = 8, lambda_parameter = 11e-2, beta = 600, maxIters = 100,
                    threshold = 2e-5, write_out_file = False, prefix_string = "output_folder/", num_proc=1)
        (cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname)
        assign = np.loadtxt("UnitTest_Data/Results.txt")
        val = abs(assign - cluster_assignment)
        self.assertEqual(sum(val), 0)

        # Test prediction works with batch of data outside of `fit` method. Perhaps there is a better way
        # to test this in parallel so these are more like unit tests rather than integration tests?
        test_batch = ticc.predict_clusters(ticc.trained_model['complete_D_train'][0:1000, ])
        batch_val = abs(test_batch - cluster_assignment[0:1000])
        self.assertEqual(sum(batch_val), 0)

        # Test streaming by passing in 5 row blocks at a time (current timestamp and previous 4)
        # I am causing data leakage by training on the whole set and then using the trained model while streaming,
        # but this is for testing the code, so it is ok
        # TODO: figure out why larger blocks don't improve predictions more. Reference:
        # https://github.com/davidhallac/TICC/issues/18#issuecomment-384514116
        def test_streaming(block_size):
            test_stream = np.zeros(1000)
            test_stream[0:block_size] = cluster_assignment[0:block_size]
            for i in range(block_size, 1000):
                point = ticc.trained_model['complete_D_train'][i - block_size:i, ]
                test_stream[i] = ticc.predict_clusters(point)[block_size - 1]

            percent_correct_streaming = 100 * sum(cluster_assignment[0:1000] == test_stream) / 1000.0
            self.assertGreater(percent_correct_streaming, 0.9)

        test_streaming(5)

        for i in range(8):
            mrf = np.loadtxt("UnitTest_Data/cluster_"+str(i)+".txt",delimiter=',')
            try:
                np.testing.assert_array_almost_equal(mrf, cluster_MRFs[i], decimal=3)
            except AssertionError:
                #Test failed
                self.assertTrue(1==0)


    def test_multiExample(self):
        fname = "example_data.txt"
        ticc = TICC(window_size = 5,number_of_clusters = 5, lambda_parameter = 11e-2, beta = 600, maxIters = 100,
                    threshold = 2e-5, write_out_file = False, prefix_string = "output_folder/", num_proc=1)
        (cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname)
        assign = np.loadtxt("UnitTest_Data/multiResults.txt")
        val = abs(assign - cluster_assignment)
        self.assertEqual(sum(val), 0)

        for i in range(5):
            mrf = np.loadtxt("UnitTest_Data/multiCluster_"+str(i)+".txt",delimiter=',')
            try:
                np.testing.assert_array_almost_equal(mrf, cluster_MRFs[i], decimal=3)
            except AssertionError:
                #Test failed
                self.assertTrue(1==0)


if __name__ == '__main__':
    unittest.main()