-
Notifications
You must be signed in to change notification settings - Fork 1
/
Existing CSVs -> MutableSparseIntSets.java
96 lines (86 loc) · 2.58 KB
/
Existing CSVs -> MutableSparseIntSets.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
package bitsetTest;
import java.util.Hashtable;
import java.util.Map;
import com.opencsv.CSVReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import com.ibm.wala.util.intset.MutableSparseIntSet;
import com.ibm.wala.util.intset.MutableSparseIntSetFactory;
public class TestBitset {
public static void main (String[] arr)
{
ArrayList<String> toCompare = new ArrayList<String>();
//fill toCompare with tables to be turned into MutableSparseIntSets and compared /
vectorize(to compare);
}
private static void vectorize(ArrayList<String> table) {
final long startTime = System.nanoTime();
Map<String,Integer> universalColumns = new Hashtable<String,Integer>();
for (int i = 0; i < table.size(); i++)
{
String fileName = table.get(i);
CSVReader reader = null;
try {
reader = new CSVReader(new FileReader(fileName));
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
String[] header = null;
try {
header = reader.readNext();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
for (int j = 0; j < header.length; j++)
{
if (! universalColumns.containsKey(header[j]) )
{
universalColumns.put(header[j],universalColumns.size());
}
}
try {
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
ArrayList<MutableSparseIntSet> allIntSets = new ArrayList<MutableSparseIntSet>();
for (int i = 0; i < table.size(); i++)
{
MutableSparseIntSetFactory factory = new MutableSparseIntSetFactory();
MutableSparseIntSet bitset = factory.make();
String fileName = table.get(i);
CSVReader reader = null;
try {
reader = new CSVReader(new FileReader(fileName));
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
String[] header = null;
try {
header = reader.readNext();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
for (int j = 0; j < header.length; j++)
{
bitset.add(universalColumns.get(header[j]));
//System.out.println(universalColumns.get(header[j]));
}
allIntSets.add(bitset);
//System.out.println(bitset);
}
final long endTime = System.nanoTime();
double seconds = (double)(endTime-startTime) / 1000000000.0;
System.out.println(allIntSets.size() + ", " + universalColumns.size());
System.out.println(seconds);
System.out.println();
}
}