forked from dcampora/cl_forward
-
Notifications
You must be signed in to change notification settings - Fork 1
/
GpuPixelSearchByTriplet.cpp
65 lines (49 loc) · 1.99 KB
/
GpuPixelSearchByTriplet.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include "GpuPixelSearchByTriplet.h"
int independent_execute(
const std::vector<std::vector<uint8_t> > & input,
std::vector<std::vector<uint8_t> > & output) {
std::vector<const std::vector<uint8_t>* > converted_input;
converted_input.resize(input.size());
for (int i=0; i<input.size(); ++i) {
converted_input[i] = &(input[i]);
}
std::cout << std::fixed << std::setprecision(2);
logger::ll.verbosityLevel = 3;
// Order input hits by X
preorder_by_x(converted_input);
return gpuPixelSearchByTripletInvocation(converted_input, output);
}
void independent_post_execute(const std::vector<std::vector<uint8_t> > & output) {
// DEBUG << "post_execute invoked" << std::endl;
DEBUG << std::endl << "Size of output: " << output.size() << " entries" << std::endl;
}
int gpuPixelSearchByTriplet(
const std::vector<const std::vector<uint8_t>* > & input,
std::vector<std::vector<uint8_t> > & output) {
// Silent execution
std::cout << std::fixed << std::setprecision(2);
logger::ll.verbosityLevel = 0;
return gpuPixelSearchByTripletInvocation(input, output);
}
/**
* Common entrypoint for Gaudi and non-Gaudi
* @param input
* @param output
*/
int gpuPixelSearchByTripletInvocation(
const std::vector<const std::vector<uint8_t>* > & input,
std::vector<std::vector<uint8_t> > & output) {
DEBUG << "Invoking gpuPixelSearchByTriplet with " << input.size() << " events" << std::endl;
// Define how many blocks / threads we need to deal with numberOfEvents
// Each execution will return a different output
output.resize(input.size());
// Execute maximum n number of events every time
const int max_events_to_process_per_kernel = 16000;
for (int i=0; i<input.size(); i+=max_events_to_process_per_kernel){
int events_to_process = input.size() - i;
if (events_to_process > max_events_to_process_per_kernel)
events_to_process = max_events_to_process_per_kernel;
invokeParallelSearch(i, events_to_process, input, output);
}
return 0;
}