-
Notifications
You must be signed in to change notification settings - Fork 0
/
output_reader.py
165 lines (152 loc) · 6.18 KB
/
output_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import sys
import os
import csv
import json
import pandas as pd
'''
Reads .out file for run information and runtime.
Input:
filename: type: string, name of file to read
directory: type: string, directory of file
Output:
dictionary with relevant info
'''
def read_out(filename, directory):
runtime_file = open(directory + '/' + filename,'r')
run_info = runtime_file.readlines()
runtime_dict = {}
runtime_dict['Iteration'] = filename.split('.')[0].split('_')[-1].strip()
# Internal Clusterer
if run_info[0].startswith('PARLAY_NUM_THREADS'):
threads = run_info[0].split(' ')[0].split('=')[-1]
runtime_dict['Threads'] = threads.strip()
run_info_arr = run_info[0].split(' --')
for elem in run_info_arr:
if elem.startswith('input_graph'):
runtime_dict['Input Graph'] = elem.split('/')[-1].strip()
elif elem.startswith('clusterer_name'):
runtime_dict['Clusterer Name'] = elem.split('=')[-1].strip()
elif elem.startswith('clusterer_config'):
runtime_dict['Config'] = elem.split('{')[-1][:-2].strip()
for elem in run_info[1:]:
if elem.startswith('Num vertices'):
runtime_dict['Num Vertices'] = elem.split(' ')[-1].strip()
elif elem.startswith('Num clusters'):
runtime_dict['Num clusters'] = elem.split(' ')[-1].strip()
elif elem.startswith('Cluster Time'):
runtime_dict['Cluster Time'] = elem.split(' ')[-1].strip()
elif elem.startswith('Read Time'):
runtime_dict['Read Time'] = elem.split(' ')[-1].strip()
# Neo4j Clusterer
elif run_info[0].startswith('GDS version:'):
for elem in run_info[1:]:
if elem.startswith('Graph:'):
runtime_dict['Input Graph'] = elem.split(',')[0][8:].strip()
runtime_dict['Clusterer Name'] = 'Neo4j' + elem.split(',')[1][8:].strip()
elif elem.startswith("{'concurrency'"):
runtime_dict['Threads'] = elem.split(',')[0].split(' ')[-1].strip()
runtime_dict['Config'] = elem.split(',', 1)[1][:-2].strip()
elif elem.startswith("Time:"):
runtime_dict['Cluster Time'] = elem.split(' ')[-1].strip()
# NetworKit Clusterer
elif run_info[0].startswith('NetworKit:'):
for elem in run_info[1:]:
if elem.startswith('Clusterer:'):
runtime_dict['Clusterer Name'] = elem.split(' ')[-1].strip()
elif elem.startswith('Threads:'):
runtime_dict['Threads'] = elem.split(' ')[-1].strip()
elif elem.startswith("Cluster Time:"):
runtime_dict['Cluster Time'] = elem.split(' ')[-1].strip()
elif elem.startswith('Graph:'):
runtime_dict['Input Graph'] = elem.split(' ')[-1].strip()
elif elem.startswith('Config:'):
runtime_dict['Config'] = elem.split(' ',1)[-1].strip()
# Snap Clusterer
elif run_info[0].startswith('Snap:'):
runtime_dict['Threads'] = 1
runtime_dict['Config'] = ''
for elem in run_info[1:]:
if elem.startswith('Wealy Connected Component Time:') or elem.startswith('KCore Time:') or elem.startswith('Cluster Time:'):
runtime_dict['Cluster Time'] = elem.split(' ')[-1].strip()
elif elem.startswith('Input graph:'):
runtime_dict['Input Graph'] = elem.split(' ')[-1].strip()
elif elem.startswith('Output file'):
runtime_dict['Clusterer Name'] = elem.split('/')[-1].split('_')[0].strip()
# Tectonic Clusterer
elif run_info[0].startswith('Tectonic:'):
runtime_dict['Threads'] = 1
runtime_dict['Config'] = run_info[2].strip()
runtime_dict['Clusterer Name'] = 'Tectonic'
for elem in run_info[1:]:
if elem.startswith('Cluster Time:'):
runtime_dict['Cluster Time'] = elem.split(' ')[-1].strip()
elif elem.startswith('Input graph:'):
runtime_dict['Input Graph'] = elem.split(' ')[-1].strip()
# Tigergraph Clusterer
elif run_info[0].startswith('Tigergraph:'):
runtime_dict['Clusterer Name'] = 'Tigergraph'
for elem in run_info[1:]:
if elem.startswith('Total Time:'):
runtime_dict['Cluster Time'] = elem.split(' ')[-1].strip()
elif elem.startswith('Config:'):
runtime_dict['Config'] = elem.split(' ')[-1].strip()
elif elem.startswith('Threads:'):
runtime_dict['Threads'] = elem.split(' ')[-1].strip()
elif elem.startswith('Input graph:'):
runtime_dict['Input Graph'] = elem.split(' ')[-1].strip()
runtime_file.close()
return runtime_dict
'''
Reads .stats file for calculated stats.
Input:
filename: type: string, name of file to read
directory: type: string, directory of file
Output:
dictionary with relevant info
'''
def read_stats(filename, directory):
stats_file = open(directory + '/' + filename,'r')
stats_dict = json.loads(stats_file.readline())
flattened_dict = {}
for elem in stats_dict:
if type(stats_dict[elem]) != dict:
flattened_dict[elem] = stats_dict[elem]
else:
for elem2 in stats_dict[elem]:
flattened_dict[elem+'_'+elem2] = stats_dict[elem][elem2]
stats_file.close()
return flattened_dict
'''
Reads through given dictionary for run information and stats.
Input:
directory: type: string, directory of files
Output:
creates csv files, 'runtimes.csv' and 'stats.csv' in given directory
'''
def read_files(directory):
encode_directory = os.fsencode(directory)
runtime_dataframe = pd.DataFrame()
stats_dataframe = pd.DataFrame()
for file in os.listdir(encode_directory):
filename = os.fsdecode(file)
# Read .out file for runtime
if filename.endswith(".out"):
runtime_dataframe = pd.concat([runtime_dataframe, pd.DataFrame([read_out(filename, directory)])], ignore_index=True)
# Read .stats file
elif filename.endswith(".stats"):
stats_dict = read_stats(filename, directory)
# Take run info from .out file
try:
runtime_dict = read_out(filename.split('.')[0] + '.out', directory)
for col in ['Clusterer Name', 'Threads', 'Input Graph', 'Config', 'Cluster Time']:
stats_dict[col] = runtime_dict[col]
except:
pass
stats_dataframe = pd.concat([stats_dataframe, pd.DataFrame([stats_dict])], ignore_index=True)
stats_dataframe.to_csv(directory + '/stats.csv')
runtime_dataframe.to_csv(directory + '/runtimes.csv')
def main():
args = sys.argv[1:]
read_files(args[0])
if __name__ == "__main__":
main()