-
Notifications
You must be signed in to change notification settings - Fork 0
/
tflogs2pandas.py
166 lines (154 loc) · 5.85 KB
/
tflogs2pandas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/env python3
import glob
import os
from os import walk
from os.path import join, split
import pprint
import traceback
import click
import pandas as pd
import tensorflow as tf
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import pdb
# Extraction function
def tflog2pandas(path: str) -> pd.DataFrame:
"""convert single tensorflow log file to pandas DataFrame
Parameters
----------
path : str
path to tensorflow log file
Returns
-------
pd.DataFrame
converted dataframe
"""
'''
DEFAULT_SIZE_GUIDANCE = {
"compressedHistograms": 1,
"images": 1,
"scalars": 0, # 0 means load all
"histograms": 1,
}
'''
DEFAULT_SIZE_GUIDANCE = {
"compressedHistograms": 500,
"images": 4,
"audio": 4,
"scalars": 100,
"histograms": 1,
"tensors": 100,
}
runlog_data = pd.DataFrame({"metric": [], "value": [], "step": []})
try:
event_acc = EventAccumulator(path, DEFAULT_SIZE_GUIDANCE)
event_acc.Reload()
tags = event_acc.Tags()["scalars"]
#tags = event_acc.Tags()["tensors"]
for tag in tags:
event_list = event_acc.Scalars(tag)
#event_list = event_acc.Tensors(tag)
values = list(map(lambda x: x.value, event_list))
#values = list(map(lambda x: tf.make_ndarray(x.tensor_proto).item(), event_list))
step = list(map(lambda x: x.step, event_list))
r = {"metric": [tag] * len(step), "value": values, "step": step}
r = pd.DataFrame(r)
runlog_data = pd.concat([runlog_data, r])
# Dirty catch of DataLossError
except Exception:
print("Event file possibly corrupt: {}".format(path))
traceback.print_exc()
return runlog_data
def many_logs2pandas(event_paths):
all_logs = pd.DataFrame()
for path in event_paths:
log = tflog2pandas(path)
if log is not None:
if all_logs.shape[0] == 0:
all_logs = log
else:
all_logs = all_logs.append(log, ignore_index=True)
return all_logs
@click.command()
@click.argument("logdir-or-logfile")
@click.option(
"--write-pkl/--no-write-pkl", help="save to pickle file or not", default=False
)
@click.option(
"--write-csv/--no-write-csv", help="save to csv file or not", default=True
)
@click.option(
"--separate_output", help="output each event separately", default=True
)
@click.option("--out-dir", "-o", help="output directory", default=".")
def main(logdir_or_logfile: str, write_pkl: bool, write_csv: bool, out_dir: str, separate_output: bool):
"""This is a enhanced version of
https://gist.github.com/ptschandl/ef67bbaa93ec67aba2cab0a7af47700b
This script exctracts variables from all logs from tensorflow event
files ("event*"),
writes them to Pandas and finally stores them a csv-file or
pickle-file including all (readable) runs of the logging directory.
Example usage:
# create csv file from all tensorflow logs in provided directory (.)
# and write it to folder "./converted"
tflogs2pandas.py . --write-csv --no-write-pkl --o converted
# creaste csv file from tensorflow logfile only and write into
# and write it to folder "./converted"
tflogs2pandas.py tflog.hostname.12345 --write-csv --no-write-pkl --o converted
"""
pp = pprint.PrettyPrinter(indent=4)
if os.path.isdir(logdir_or_logfile):
# Get all event* runs from logging_dir subdirectories
#event_paths = glob.glob(os.path.join(logdir_or_logfile, "event*"))
event_paths = []
for dir_path, dir_names, file_names in walk(logdir_or_logfile):
event_paths.extend([join(dir_path, file_name) for file_name in file_names])
elif os.path.isfile(logdir_or_logfile):
event_paths = [logdir_or_logfile]
else:
raise ValueError(
"input argument {} has to be a file or a directory".format(
logdir_or_logfile
)
)
# Call & append
if event_paths:
if separate_output:
for event_path in event_paths:
pp.pprint("Found tensorflow logs to process:")
pp.pprint([event_path])
all_logs = many_logs2pandas([event_path])
pp.pprint("Head of created dataframe")
pp.pprint(all_logs.head())
new_out_dir = split(event_path.replace(logdir_or_logfile, out_dir))[0]
os.makedirs(join(new_out_dir), exist_ok=True)
if write_csv:
print("saving to csv file")
out_file = os.path.join(new_out_dir, "events.csv")
print(out_file)
all_logs.to_csv(out_file, index=None)
if write_pkl:
print("saving to pickle file")
out_file = os.path.join(new_out_dir, "events.pkl")
print(out_file)
all_logs.to_pickle(out_file)
else:
pp.pprint("Found tensorflow logs to process:")
pp.pprint(event_paths)
all_logs = many_logs2pandas(event_paths)
pp.pprint("Head of created dataframe")
pp.pprint(all_logs.head())
os.makedirs(out_dir, exist_ok=True)
if write_csv:
print("saving to csv file")
out_file = os.path.join(out_dir, "all_training_logs_in_one_file.csv")
print(out_file)
all_logs.to_csv(out_file, index=None)
if write_pkl:
print("saving to pickle file")
out_file = os.path.join(out_dir, "all_training_logs_in_one_file.pkl")
print(out_file)
all_logs.to_pickle(out_file)
else:
print("No event paths have been found.")
if __name__ == "__main__":
main()