Skip to content

Commit

Permalink
fix cuda
Browse files Browse the repository at this point in the history
  • Loading branch information
lehner committed Oct 6, 2023
1 parent 0c8f23a commit 2e7177b
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 23 deletions.
2 changes: 1 addition & 1 deletion lib/cgpt/lib/stencil/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ typename vobj::scalar_object coalescedReadGeneralPermute(const vobj & __restrict
} else { \
obj = coalescedRead(view[_SE->_offset]); \
} \
acceleratorSynchronize(); \
acceleratorSynchronise(); \
if (do_adj) \
obj = adj(obj); \
}
Expand Down
19 changes: 11 additions & 8 deletions lib/cgpt/lib/stencil/matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,20 +96,23 @@ class cgpt_stencil_matrix : public cgpt_stencil_matrix_base {

int nd = fields[0].Grid()->Nd();

int _npb = n_code_parallel_blocks;
int _npbs = n_code_parallel_block_size;

auto sview = stencil.View();

accelerator_for(ss_block,fields[0].Grid()->oSites() * n_code_parallel_blocks,M::Nsimd(),{

auto ss = ss_block / n_code_parallel_blocks;
auto oblock = ss_block % n_code_parallel_blocks;
accelerator_for(ss_block,fields[0].Grid()->oSites() * _npb,T::Nsimd(),{

auto ss = ss_block / _npb;
auto oblock = ss_block % _npb;

for (int iblock=0;iblock<n_code_parallel_block_size;iblock++) {
for (int iblock=0;iblock<_npbs;iblock++) {

int i = oblock * n_code_parallel_block_size + iblock;
int i = oblock * _npbs + iblock;

obj_t t;

auto _f0 = &p_code[i].factor[0];
const auto _f0 = &p_code[i].factor[0];
fetch(t, _f0->point, ss, fields_v[_f0->index], _f0->adj);

for (int j=1;j<p_code[i].size;j++) {
Expand All @@ -126,7 +129,7 @@ class cgpt_stencil_matrix : public cgpt_stencil_matrix_base {
}

});

VECTOR_VIEW_CLOSE(fields_v);
}

Expand Down
17 changes: 10 additions & 7 deletions lib/cgpt/lib/stencil/matrix_matrix_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,19 @@ class cgpt_stencil_matrix_matrix_vector : public cgpt_stencil_matrix_matrix_vect

int nd = matrix_fields[0].Grid()->Nd();

int _npb = n_code_parallel_blocks;
int _npbs = n_code_parallel_block_size;

auto sview = stencil.View();

accelerator_for(ss_block,matrix_fields[0].Grid()->oSites() * _npb,M::Nsimd(),{

auto ss = ss_block / _npb;
auto oblock = ss_block % _npb;

accelerator_for(ss_block,matrix_fields[0].Grid()->oSites() * n_code_parallel_blocks,M::Nsimd(),{

auto ss = ss_block / n_code_parallel_blocks;
auto oblock = ss_block % n_code_parallel_blocks;

for (int iblock=0;iblock<n_code_parallel_block_size;iblock++) {
for (int iblock=0;iblock<_npbs;iblock++) {

int i = oblock * n_code_parallel_block_size + iblock;
int i = oblock * _npbs + iblock;
obj_v_t t;

fetch(t, p_code[i].source_point, ss, fields_v_v[p_code[i].source], 0);
Expand Down
17 changes: 10 additions & 7 deletions lib/cgpt/lib/stencil/matrix_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,19 @@ class cgpt_stencil_matrix_vector : public cgpt_stencil_matrix_vector_base {

int nd = matrix_fields[0].Grid()->Nd();

int _npb = n_code_parallel_blocks;
int _npbs = n_code_parallel_block_size;

auto sview = stencil.View();

accelerator_for(ss_block,matrix_fields[0].Grid()->oSites() * _npb,M::Nsimd(),{

auto ss = ss_block / _npb;
auto oblock = ss_block % _npb;

accelerator_for(ss_block,matrix_fields[0].Grid()->oSites() * n_code_parallel_blocks,M::Nsimd(),{

auto ss = ss_block / n_code_parallel_blocks;
auto oblock = ss_block % n_code_parallel_blocks;

for (int iblock=0;iblock<n_code_parallel_block_size;iblock++) {
for (int iblock=0;iblock<_npbs;iblock++) {

int i = oblock * n_code_parallel_block_size + iblock;
int i = oblock * _npbs + iblock;
obj_v_t t;

fetch(t, p_code[i].source_point, ss, fields_v_v[p_code[i].source], 0);
Expand Down

0 comments on commit 2e7177b

Please sign in to comment.