forked from andikleen/pmu-tools
-
Notifications
You must be signed in to change notification settings - Fork 1
/
interval-normalize.py
executable file
·80 lines (72 loc) · 2.32 KB
/
interval-normalize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python
# convert perf stat -Ixxx -x, / toplev -Ixxx -x, output to normalized output
# this version buffers all data in memory, so it can use a lot of memory.
# t1,ev1,num1
# t1,ev2,num1
# t2,ev1,num3
# ->
# timestamp,ev1,ev2
# t1,num1,num2
# t2,num3,,
# when the input has CPU generate separate lines for each CPU (may need post filtering)
import sys
import csv
import re
import copy
import argparse
import csv_formats
ap = argparse.ArgumentParser(description=
'Normalize CSV data from perf or toplev. All values are printed on a single line.')
ap.add_argument('inputfile', type=argparse.FileType('r'), default=sys.stdin, nargs='?')
ap.add_argument('--output', '-o', type=argparse.FileType('w'), default=sys.stdout, nargs='?')
ap.add_argument('--cpu', nargs='?', help='Only output for this cpu')
ap.add_argument('--na', nargs='?', help='Value to use if data is not available', default="")
args = ap.parse_args()
printed_header = False
timestamp = None
events = dict()
out = []
times = []
cpus = []
rc = csv.reader(args.inputfile)
res = []
writer = csv.writer(args.output)
lastcpu = None
for row in rc:
r = csv_formats.parse_csv_row(row)
if r is None:
continue
ts, cpu, ev, val = r.ts, r.cpu, r.ev, r.val
if ts != timestamp or cpu != lastcpu:
if timestamp:
if args.cpu and cpu != args.cpu:
continue
# delay in case we didn't see all headers
# only need to do that for toplev, directly output for perf?
# could limit buffering to save memory?
out.append(res)
times.append(ts)
cpus.append(cpu)
res = []
timestamp = ts
lastcpu = cpu
# use a list for row storage to keep memory requirements down
if ev not in events:
events[ev] = len(res)
ind = events[ev]
if ind >= len(res):
res += [None] * ((ind + 1) - len(res))
res[ind] = val
def resolve(row, ind):
if ind >= len(row):
return args.na
v = row[ind]
if v is None:
return args.na
return v
keys = sorted(events.keys())
writer.writerow(["Timestamp"] + (["CPU"] if cpu is not None else []) + keys)
for row, ts, cpunum in zip(out, times, cpus):
writer.writerow([ts] +
([cpunum] if cpu is not None else []) +
([resolve(row, events[x]) for x in keys]))