forked from pnkapadia6/fast-search
-
Notifications
You must be signed in to change notification settings - Fork 0
/
search_string_parallel.cu
164 lines (135 loc) · 5.45 KB
/
search_string_parallel.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
// Parallel implementation of string searching
// a?b gives -> acb, abb, ...
// case insensitive
// using local memory
// blocks & threads
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <cstdio>
#include <math.h>
using namespace std;
/* M - window size */
#define M 20
__global__ void searchKeywordKernel(int *result, char *data, char *keyword,int keyword_len)
{
int i = blockIdx.x * blockDim.x + threadIdx.x , j;
result[i]=0;
char * s= (char *) malloc (M + keyword_len -1);
for (j = 0; j < M + keyword_len - 1; j++)
{
s[j] = data[j + (i * M)];
}
s[j]='\0';
keyword[keyword_len]='\0';
// printf("TT Keyword=%s %d\n",keyword, keyword_len);
// printf("Line %d -> %s M = %d\n", i, s, M);
bool flag=0;
int k=0;
for (int j = 0; j <= M; j++)
{
if(s[j] >= 65 && s[j] <= 90)
s[j]+=32;
if (s[j] == keyword[0] || keyword[0]=='?')
{
flag=0;
for (k = 1; k < keyword_len; k++)
{
if(keyword[k]=='?')
continue;
if(s[k+j] >= 65 && s[k+j] <= 90)
s[k+j] += 32;
if (s[k + j] != keyword[k] || s[k + j]==' ' || s[k + j]=='\n')
{
flag=0;
break;
}
else
{
flag=1;
}
}
if(flag==1)
result[i]=result[i]+1;
}
}
__syncthreads();
}
int main()
{
cudaEvent_t k_start, k_stop, t_start, t_stop, c_start, c_stop;
cudaEventCreate(&t_start);
cudaEventCreate(&t_stop);
cudaEventRecord(t_start, 0);
cudaEventCreate(&c_start);
cudaEventCreate(&c_stop);
cudaEventRecord(c_start, 0);
std::ifstream t("text_150.txt");
std::stringstream buffer;
buffer << t.rdbuf();
string data_s = buffer.str();
const char *data = data_s.c_str();
cudaEventRecord(c_stop, 0);
cudaEventSynchronize(c_stop);
float copy_time;
cudaEventElapsedTime(©_time, c_start, c_stop);
printf("\nCopy time: %f msec\n",copy_time);
// printf("\nM=%d\n", M);
// printf("Data size = %ld \n",data_s.size());
// printf("%s\n",data);
t.close();
int num_blocks = ceil(data_s.size()/(float)(1024 * M)) ;
int num_threads = ceil(data_s.size()/(float)(M*num_blocks));
// printf("No of threads = %d blocks=%d \n",num_threads, num_blocks);
char *keyword = "ab";
size_t keyword_len = strlen(keyword);
// printf("Keyword=%s %ld \n",keyword, keyword_len);
int *result = (int *) malloc(num_blocks * num_threads * sizeof(int));
memset(result, 0, num_blocks * num_threads);
//device data
char *dev_data = 0;
char *dev_keyword = 0;
int *dev_result = 0;
// Allocate GPU buffers for result set.
cudaMalloc((void**) &dev_result, num_blocks * num_threads * sizeof(int));
cudaMalloc((void**) &dev_data, data_s.size() + 1);
cudaMalloc((void**) &dev_keyword, keyword_len);
// Copy input data and keyword from host memory to GPU buffers.
cudaMemcpy(dev_data, data, data_s.size() + 1, cudaMemcpyHostToDevice);
cudaMemcpy(dev_keyword, keyword, keyword_len, cudaMemcpyHostToDevice);
cudaMemcpy(dev_result, result, num_blocks * num_threads, cudaMemcpyHostToDevice);
cudaEventCreate(&k_start);
cudaEventCreate(&k_stop);
cudaEventRecord(k_start, 0);
// Launch a search keyword kernel on the GPU with one thread for each element.
searchKeywordKernel<<<num_blocks, num_threads>>>(dev_result, dev_data, dev_keyword, keyword_len);
cudaDeviceSynchronize();
// Copy result from GPU buffer to host memory.
cudaMemcpy(result, dev_result, num_blocks * num_threads * sizeof(int),cudaMemcpyDeviceToHost);
cudaEventRecord(k_stop, 0);
cudaEventSynchronize(k_stop);
float kernel_time;
cudaEventElapsedTime(&kernel_time, k_start, k_stop);
printf("\nKernel time: %f msec\n",kernel_time);
printf("\n");
int total_matches = 0;
for (int i = 0; i < num_threads * num_blocks; i++)
{
if (result[i] > 0)
{
// printf("%d matches found at line %d \n",result[i], i);
total_matches=total_matches+result[i];
}
}
printf("Total matches = %d\n", total_matches);
cudaFree(dev_result);
cudaFree(dev_data);
cudaFree(dev_keyword);
cudaEventRecord(t_stop, 0);
cudaEventSynchronize(t_stop);
float total_time;
cudaEventElapsedTime(&total_time, t_start, t_stop);
printf("\nTotal time: %f msec\n",total_time);
return 0;
}