-
Notifications
You must be signed in to change notification settings - Fork 0
/
pmpp__histogram.cu
94 lines (81 loc) · 2.88 KB
/
pmpp__histogram.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#include "pmpp__histogram.h"
__global__
void pmpp__histogram_aggregated_kernel(char* input, const int input_length, int* output, const int output_length) {
extern __shared__ unsigned int agg_histo_s[];
unsigned int tid = blockIdx.x*blockDim.x + threadIdx.x;
for(unsigned int binIdx = threadIdx.x; binIdx < output_length; binIdx +=blockDim.x) {
agg_histo_s[binIdx] = 0;
}
__syncthreads();
int prev_index = -1;
int accumulator = 1;
for(unsigned int i = tid; i < input_length; i += blockDim.x*gridDim.x) {
int alphabet_position = input[i] - 'a';
if (alphabet_position >= 0 && alphabet_position < 26) {
unsigned int curr_index = alphabet_position/4;
if (curr_index != prev_index) {
atomicAdd(&(agg_histo_s[alphabet_position/4]), accumulator);
accumulator = 1;
prev_index = curr_index;
}
else {
accumulator++;
}
}
}
__syncthreads();
for(unsigned int binIdx = threadIdx.x; binIdx < output_length; binIdx += blockDim.x) {
atomicAdd(&(output[binIdx]), agg_histo_s[binIdx]);
}
}
__global__
void pmpp__histogram_privatized_kernel(char* input, const int input_length, int* output, const int output_length) {
extern __shared__ unsigned int priv_histo_s[];
unsigned int tid = blockIdx.x*blockDim.x + threadIdx.x;
for(unsigned int binIdx = threadIdx.x; binIdx < output_length; binIdx +=blockDim.x) {
priv_histo_s[binIdx] = 0;
}
__syncthreads();
for (unsigned int i = tid; i < input_length; i += blockDim.x*gridDim.x) {
int alphabet_position = input[i] - 'a';
if (alphabet_position >= 0 && alphabet_position < 26){
atomicAdd(&(priv_histo_s[alphabet_position/4]), 1);
}
}
__syncthreads();
for(unsigned int binIdx = threadIdx.x; binIdx < output_length; binIdx += blockDim.x) {
atomicAdd(&(output[binIdx]), priv_histo_s[binIdx]);
}
}
__global__
void pmpp__histogram_with_interleaved_partitioning_kernel(char *input, const int length, int *output){
unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
for (unsigned int i = tid; i < length; i += blockDim.x*gridDim.x ) {
int alphabet_position = input[i] - 'a';
if (alphabet_position >= 0 && alphabet_position < 26){
atomicAdd(&(output[alphabet_position/4]), 1);
}
}
}
__global__
void pmpp__histogram_with_block_partitioning_kernel(char *input, const int length, int *output){
int i = threadIdx.x + blockIdx.x * blockDim.x;
int section_size = ceil(length/(double)(blockDim.x * gridDim.x));
int start = i*section_size;
for (int k = 0; k < section_size; k++) {
if (start+k < length) {
int alphabet_position = input[start+k] - 'a';
if (alphabet_position >= 0 && alphabet_position < 26){
atomicAdd(&(output[alphabet_position/4]), 1);
}
}
}
}
void pmpp__histogram_host(char *input, const int length, int *output){
for (int i = 0; i < length; i++) {
int alphabet_position = input[i] - 'a';
if (alphabet_position >= 0 && alphabet_position < 26) {
output[alphabet_position/4]++;
}
}
}