-
Notifications
You must be signed in to change notification settings - Fork 8
/
ludecomposition.cl
63 lines (52 loc) · 1.52 KB
/
ludecomposition.cl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#include "FGPUlib.c"
#include "divsf3.c"
#include "subsf3.c"
#include "mulsf3.c"
__kernel void ludecomposition_L_pass(__global float *mat,__global float *L, unsigned size, unsigned k)
{
// i should have an offset of k+1
unsigned i = get_global_id(0);
if (i < size) {
float tmp = mat[i*size + k] / mat[k*size + k];
L[i*size + k] = tmp;
if (i == k+1) {
L[k*size + k] = 1;
}
}
}
__kernel void ludecomposition_U_pass(__global float *mat,__global float *L, unsigned size, unsigned k)
{
// i & j should have an offset of k+1
unsigned i = get_global_id(1);
unsigned j = get_global_id(0);
bool write = (i < size) & (j != k) & (j < size);
// mat[i*size+j] = tmp;
// if(write) {
float tmp = L[i*size + k];
float res = mat[i*size+j] - tmp*mat[k*size + j];
mat[i*size+j] = res;
// }
}
// This kernel decomposes a matrix into lower and upper parts
// The upper part will overwrite the oroginal matrix
// The lower one will be stored in L
// __kernel void ludecomposition_pass(__global float *mat,__global float *L, unsigned size, unsigned k)
// {
// // i & j should have an offset of k+1
// unsigned i = get_global_id(1);
// unsigned j = get_global_id(0);
//
// float tmp = mat[i*size + k] / mat[k*size + k];
// float res = mat[i*size+j] - tmp*mat[k*size + j];
//
// if (i < size) {
// if (j == k) {
// L[i*size + k] = tmp;
// if (i == k+1) {
// L[k*size + k] = 1;
// }
// } else if(j < size) {
// mat[i*size+j] = res;
// }
// }
// }