-
Notifications
You must be signed in to change notification settings - Fork 4
/
rdtsc.h
319 lines (256 loc) · 10.8 KB
/
rdtsc.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
#ifndef __RDTSC_H__
#define __RDTSC_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/time.h>
//#include "config.h"
#define CHECK_ERROR(err) {if (err != CL_SUCCESS) { \
fprintf(stderr, "Error: %s %d\n", get_error_string(err), err);\
exit(1); \
}}
#include <CL/cl.h>
#define USEGPU 1
#define PLATFORM_ID 0
#define DEVICE_ID 0
#include <stdint.h>
#ifndef _STRING_H
#include <string.h>
#endif
#include <stdio.h>
//#include "config.h"
const char *get_error_string(cl_int err);
extern cl_event ocdTempEvent;
#ifdef ENABLE_TIMER
//use negative values for composed timers, so we can potentially look at MSB as a quick identifier
enum timer_types {
OCD_TIMER_D2H = 1, OCD_TIMER_H2D = 2, OCD_TIMER_D2D = 4, OCD_TIMER_KERNEL = 8, OCD_TIMER_HOST = 16, OCD_TIMER_DUAL = -1
};
struct ocdTimer {
enum timer_types type;
const char * name;
int nlen;
//char name[TIMER_NAME_LEN]; //Optional timer name, can be searched for grouping
cl_ulong starttime, endtime;
cl_event event;
};
//use the same struct except have an extra event
extern struct ocdTimer * ocdTempTimer;
struct ocdDualTimer {
enum timer_types type;
const char * name;
int nlen;
//char name[TIMER_NAME_LEN];
cl_ulong starttime, endtime;
cl_event event[2];
};
extern struct ocdDualTimer * ocdTempDualTimer;
//host timers don't actually use events, rather two gettimeofday calls
// which return time values immediately
//microsecond resolution is scaled up by 1000 to be compatible with
//CL-based timers
struct ocdHostTimer {
enum timer_types type;
const char * name;
int nlen;
cl_ulong starttime, endtime;
struct timeval timer;
} ;
extern struct ocdHostTimer fullExecTimer;
//the above fullExecTimer is a special purpose timer which does not reside on
//any list, but measures host time from OCD_INIT to OCD_FINISH
extern struct ocdHostTimer * ocdTempHostTimer;
#ifdef TIMER_TEST
//number of fake names to generate
#define TIMER_TEST_NAME_COUNT 200
//maximum number of fake events per name
#define TIMER_TEST_MAX_LIFE 20
#define TIMER_TEST_MAX_LENG 30
#endif
union ocdInternalTimer {
struct ocdTimer s;
struct ocdDualTimer c;
struct ocdHostTimer h;
};
struct timer_group_mem;
struct timer_group_mem {
union ocdInternalTimer * timer;
struct timer_group_mem * next;
struct timer_group_mem * alphanext; //ignored except for alpha sort
};
extern struct timer_group_mem head; //sentinel
extern struct timer_group_mem * tail;
extern char rootStr[1];
extern cl_ulong rootTimes[7];
extern cl_ulong totalTimes[7];
struct timer_name_tree_node {
const char * string; //the first character is hijacked as a flag for pointer ownership
//to make sense of who is responsible for freeing at the end
//this lets one descendant branch reuse our space
int len; //length, not counting flag and zero-byte
struct timer_name_tree_node * next;
struct timer_name_tree_node * child;
struct timer_group_mem * n_head; //first list node for a timer matching this name
int tcount;
cl_ulong * times; //pointer to a 7-member array of cl_ulongs
//one aggregator for each type, and another for all
};
extern struct timer_name_tree_node root;
//linear search of the Name List.
//returns a pointer to the correct time array, or -1 if none exists yet
//rather inefficient if many names are used, but the tree will take care of
// speeding lookups, and we'll switch to alpha sort by default as a sideffect
extern void * checkSimpleNameList(const char * s, int len);
extern struct timer_name_tree_node * atail;
//simple named timer aggregation
//linear scan of the timer list, adds nodes to a names list as necessary
//DO NOT USE AT THE SAME TIME AS THE TREE
//this replaces the tree with a simple unordered list
extern void simpleNameTally();
//assumes simpleNameTally was already called (once) to add up timers
//now culls off zero-value timers
extern void simpleNamePrint();
//chews up the timer list from head to tail, deallocating all nodes
extern void destTimerList();
//chews up the simpleNameList from root to atail, deallocating all nodes
extern void destNameList();
//only returns the primary timer, not any composed timers
extern void * getTimePtr(cl_event e);
//only returns a composed timer with events matching both e1 and e2, in either order
extern void * getDualTimePtr(cl_event e1, cl_event e2);
//simply adds timer t to the end of the list
extern void addTimer(union ocdInternalTimer * t);
//irreversible! Only call immediately before freeing the timer!
extern int removeTimer(union ocdInternalTimer * t);
#ifdef TIMER_TEST
//Debug call for checking list construction
extern void walkList();
#endif
//should work for composed timers, so long as start_timer is strictly used on the first of the two events
#define START_TIMER(e, t, n, p) {void * ptr = getTimePtr(e); \
if (ptr == (void *) -1) {\
/*fprintf(stderr, "Timer Error: Cannot start uninitialized timer for event [%lx]!\n", (unsigned long) e); */\
if(t >= OCD_TIMER_HOST || t <= OCD_TIMER_DUAL) { \
fprintf(stderr, "Timer Error: invalid type [%d] for START_TIMER!\nTimer for event [%lx] not initialized or started!", t, (unsigned long) e); \
}else { \
struct ocdTimer * temp = (struct ocdTimer*) calloc(sizeof(struct ocdTimer), 1); \
temp->type = t;\
temp->event = e;\
temp->name = n;\
addTimer((union ocdInternalTimer *)temp);\
p = temp; /*set the return pointer*/\
cl_int err = clGetEventProfilingInfo(e, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &temp->starttime, NULL); \
CHECK_ERROR(err)\
}\
} else {\
p = &((union ocdInternalTimer *)ptr)->s; /*set the return pointer*/\
cl_int err= clGetEventProfilingInfo(e, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &((union ocdInternalTimer *)ptr)->s.starttime, NULL); \
CHECK_ERROR(err)\
}\
}
//starts a gettimeofday-based timer
#define START_HOST_TIMER(n, p) {\
struct ocdHostTimer * temp = (struct ocdHostTimer*) calloc(sizeof(struct ocdHostTimer), 1);\
temp->type = OCD_TIMER_HOST;\
gettimeofday(temp->timer, NULL);\
temp->starttime = 1000 * (temp->timer.tv_sec*1000000L + temp->timer.tv_usec);\
p = temp;\
}
//should work for composed timers, so long as end_timer is strictly used on the last
//assumes t is a valid single event, does not check if it's a dual
#define END_TIMER(t) {\
cl_int err = clGetEventProfilingInfo(t->event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), (void *)&t->endtime, NULL); \
CHECK_ERROR(err)\
}
//assumes t is a valid timer, ensures it's a host-type
#define END_HOST_TIMER(t) {\
if (t->type == OCD_TIMER_HOST) {\
gettimeofday(t->timer, NULL);\
temp->endtime = 1000 * (t->timer.tv_sec*1000000L + t->timer.tv_usec);\
}\
}
#define TOTAL_EXEC totalTimes[0]
#define TOTAL_D2H totalTimes[1]
#define TOTAL_H2D totalTimes[2]
#define TOTAL_D2D totalTimes[3]
#define TOTAL_KERNEL totalTimes[4]
#define TOTAL_HOST totalTimes[5]
#define TOTAL_DUAL totalTimes[6]
#define OCD_PRINT_TIMERS {printf("********************************************************************************\n"\
"OCD Core Timers (nanoseconds)\n"\
"********************************************************************************\n"\
"Total Execution Time: \t[%llu]\n"\
"\tHost to Device Time: [%llu]\n"\
"\tDevice to Host Time: [%llu]\n"\
"\tDevice to Device Time: [%llu]\n"\
"\tDevice Kernel Time: [%llu]\n"\
"\tUser Timer Total: [%llu]\n"\
"\tComposed Timer Total: [%llu]\n"\
"********************************************************************************\n"\
,TOTAL_EXEC, TOTAL_H2D, TOTAL_D2H, TOTAL_D2D, TOTAL_KERNEL, TOTAL_HOST, TOTAL_DUAL);}
//absolutely everything needed to start the timers
#define TIMER_INIT {\
gettimeofday(&fullExecTimer.timer, NULL);\
fullExecTimer.starttime = 1000 * (fullExecTimer.timer.tv_sec*1000000L + fullExecTimer.timer.tv_usec);\
}
//and absolutely everything needed to finalize them
// performs timer aggregation and printing
// deconstructs timer list and name tree/list
//TODO-free all our data structures
#define TIMER_FINISH {\
gettimeofday(&fullExecTimer.timer, NULL);\
fullExecTimer.endtime = 1000 * (fullExecTimer.timer.tv_sec*1000000L + fullExecTimer.timer.tv_usec);\
simpleNameTally();\
OCD_PRINT_TIMERS\
simpleNamePrint();\
destNameList();\
destTimerList();\
}
//starts the dual timer specified by events a and b, assumes a is the "first" event
#define START_DUAL_TIMER(a, b, n, p) {void * ptr = getDualTimePtr(a, b); \
if (ptr == (void *) -1) {\
/*fprintf(stderr, "Timer Error: Cannot start uninitialized timer for events [%lx] and [%lx]!\n", (unsigned long) a, (unsigned long) b);*/ \
struct ocdDualTimer * temp = (struct ocdDualTimer*) calloc(sizeof(struct ocdDualTimer), 1); \
temp->type = OCD_TIMER_DUAL;\
temp->event[0] = a;\
temp->event[1] = b;\
temp->name = n;\
addTimer((union ocdInternalTimer *)temp);\
p = temp; /*set the return pointer*/\
cl_int err = clGetEventProfilingInfo(a, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &temp->starttime, NULL); \
CHECK_ERROR(err)\
} else {\
p = &((union ocdInternalTimer *)ptr)->c;\
cl_int err = clGetEventProfilingInfo(a, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &((union ocdInternalTimer *)ptr)->s.starttime, NULL); \
CHECK_ERROR(err)\
}\
}
//assumes t is a valid ocdTimer, but ensures it's a dual timer
#define END_DUAL_TIMER(t) {\
if (t->type == OCD_TIMER_DUAL) { \
cl_int err = clGetEventProfilingInfo(t->event[1], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), (void *)&t->endtime, NULL); \
CHECK_ERROR(err)\
}}
#else
#define OCD_INIT
#define OCD_FINISH
#define START_TIMER(e, t, n, p)
#define END_TIMER(t)
#define START_DUAL_TIMER(a, b, n, p)
#define END_DUAL_TIMER(t)
#define PRINT_CORE_TIMERS
#define START_HOST_TIMER(n, p)
#define END_HOST_TIMER(t)
#endif
#ifdef START_POWER
#define START_KERNEL printf("Kernel Start\n");
#define END_KERNEL printf("Kernel END\n");
#else
#define START_KERNEL
#define END_KERNEL
#endif
extern cl_device_id GetDevice(int platform, int device);
#ifdef __cplusplus
}
#endif
#endif //FILE