Skip to content
This repository has been archived by the owner on Mar 20, 2023. It is now read-only.

Commit

Permalink
Fixed clang-format
Browse files Browse the repository at this point in the history
  • Loading branch information
iomaganaris authored and olupton committed Dec 23, 2021
1 parent 6452837 commit 79d0cfc
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 22 deletions.
14 changes: 10 additions & 4 deletions coreneuron/network/netcvode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,8 +533,10 @@ void NetCvode::check_thresh(NrnThread* nt) { // for default method

nrn_pragma_acc(parallel loop present(
nt [0:1], presyns_helper [0:nt->n_presyn], presyns [0:nt->n_presyn], actual_v [0:nt->end])
copy(net_send_buf_count) if (nt->compute_gpu) async(nt->streams[nt->stream_id]))
nrn_pragma_omp(target teams distribute parallel for map(tofrom: net_send_buf_count) if(nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
copy(net_send_buf_count) if (nt->compute_gpu)
async(nt->streams[nt->stream_id]))
nrn_pragma_omp(target teams distribute parallel for map(tofrom: net_send_buf_count)
if(nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
for (int i = 0; i < nt->ncell; ++i) {
PreSyn* ps = presyns + i;
PreSynHelper* psh = presyns_helper + i;
Expand Down Expand Up @@ -569,9 +571,13 @@ void NetCvode::check_thresh(NrnThread* nt) { // for default method
#ifdef CORENEURON_ENABLE_GPU
int* nsbuffer = nt->_net_send_buffer;
#endif
nrn_pragma_acc(update host(nsbuffer [0:nt->_net_send_buffer_cnt]) async(nt->streams[nt->stream_id]))
nrn_pragma_acc(update host(nsbuffer [0:nt->_net_send_buffer_cnt])
async(nt->streams[nt->stream_id]))
nrn_pragma_acc(wait async(nt->streams[nt->stream_id]))
nrn_pragma_omp(target update from(nsbuffer [0:nt->_net_send_buffer_cnt]) depend(inout: nt->streams[nt->stream_id]) nowait)
// clang-format off
nrn_pragma_omp(target update from(nsbuffer [0:nt->_net_send_buffer_cnt])
depend(inout: nt->streams[nt->stream_id]) nowait)
// clang-format on
nrn_pragma_omp(taskwait)
}

Expand Down
5 changes: 4 additions & 1 deletion coreneuron/network/partrans.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ void nrnmpi_v_transfer() {
}
nrn_pragma_acc(update host(src_gather [0:n_src_gather]) if (nt->compute_gpu)
async(nt->streams[nt->stream_id]))
nrn_pragma_omp(target update from(src_gather [0:n_src_gather]) if (nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
// clang-format off
nrn_pragma_omp(target update from(src_gather [0:n_src_gather]) if (nt->compute_gpu)
depend(inout: nt->streams[nt->stream_id]) nowait)
// clang-format on
}

// copy gathered source values to outsrc_buf_
Expand Down
20 changes: 12 additions & 8 deletions coreneuron/permute/cellorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -600,14 +600,18 @@ void solve_interleaved2(int ith) {
defined(_OPENACC)
int nstride = stridedispl[nwarp];
#endif
nrn_pragma_acc(parallel loop gang vector vector_length(
warpsize) present(nt [0:1],
strides [0:nstride],
ncycles [0:nwarp],
stridedispl [0:nwarp + 1],
rootbegin [0:nwarp + 1],
nodebegin [0:nwarp + 1]) if (nt->compute_gpu) async(nt->streams[nt->stream_id]))
nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
nrn_pragma_acc(parallel loop gang vector vector_length(warpsize)
present(nt [0:1],
strides [0:nstride],
ncycles [0:nwarp],
stridedispl [0:nwarp + 1],
rootbegin [0:nwarp + 1],
nodebegin [0:nwarp + 1]) if (nt->compute_gpu)
async(nt->streams[nt->stream_id]))
// clang-format off
nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu)
depend(inout: nt->streams[nt->stream_id]) nowait)
// clang-format on
for (int icore = 0; icore < ncore; ++icore) {
int iwarp = icore / warpsize; // figure out the >> value
int ic = icore & (warpsize - 1); // figure out the & mask
Expand Down
11 changes: 8 additions & 3 deletions coreneuron/sim/fadvance_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,10 @@ void nrncore2nrn_send_values(NrnThread* nth) {
double* gather_i = tr->gather[i];
nrn_pragma_acc(update self(gather_i [0:1]) if (nth->compute_gpu)
async(nth->streams[nth->stream_id]))
nrn_pragma_omp(target update from(gather_i [0:1]) if (nth->compute_gpu) depend(inout: nth->streams[nth->stream_id]) nowait)
// clang-format off
nrn_pragma_omp(target update from(gather_i [0:1]) if (nth->compute_gpu)
depend(inout: nth->streams[nth->stream_id]) nowait)
// clang-format on
}
nrn_pragma_acc(wait async(nth->streams[nth->stream_id]))
for (int i = 0; i < tr->n_trajec; ++i) {
Expand All @@ -341,7 +344,8 @@ static void* nrn_fixed_step_thread(NrnThread* nth) {
if (nth->ncell) {
/*@todo: do we need to update nth->_t on GPU: Yes (Michael, but can
launch kernel) */
nrn_pragma_acc(update device(nth->_t) if (nth->compute_gpu) async(nth->streams[nth->stream_id]))
nrn_pragma_acc(update device(nth->_t) if (nth->compute_gpu)
async(nth->streams[nth->stream_id]))
nrn_pragma_acc(wait async(nth->streams[nth->stream_id]))
nrn_pragma_omp(target update to(nth->_t) if (nth->compute_gpu))
fixed_play_continuous(nth);
Expand Down Expand Up @@ -377,7 +381,8 @@ void* nrn_fixed_step_lastpart(NrnThread* nth) {

if (nth->ncell) {
/*@todo: do we need to update nth->_t on GPU */
nrn_pragma_acc(update device(nth->_t) if (nth->compute_gpu) async(nth->streams[nth->stream_id]))
nrn_pragma_acc(update device(nth->_t) if (nth->compute_gpu)
async(nth->streams[nth->stream_id]))
nrn_pragma_acc(wait async(nth->streams[nth->stream_id]))
nrn_pragma_omp(target update to(nth->_t) if (nth->compute_gpu))
fixed_play_continuous(nth);
Expand Down
9 changes: 5 additions & 4 deletions coreneuron/sim/multicore.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,11 @@ struct NrnThread: public MemoryManaged {

NrnThreadBAList* tbl[BEFORE_AFTER_SIZE]; /* wasteful since almost all empty */

int shadow_rhs_cnt = 0; /* added to facilitate the NrnThread transfer to GPU */
int compute_gpu = 0; /* define whether to compute with gpus */
int stream_id = 0; /* define where the kernel will be launched on GPU stream */
std::vector<int> streams; /* vector of stream ids needed for async execution of OpenMP in multiple streams */
int shadow_rhs_cnt = 0; /* added to facilitate the NrnThread transfer to GPU */
int compute_gpu = 0; /* define whether to compute with gpus */
int stream_id = 0; /* define where the kernel will be launched on GPU stream */
std::vector<int> streams; /* vector of stream ids needed for async execution of OpenMP in
multiple streams */
int _net_send_buffer_size = 0;
int _net_send_buffer_cnt = 0;
int* _net_send_buffer = nullptr;
Expand Down
6 changes: 4 additions & 2 deletions coreneuron/sim/treeset_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,10 @@ static void nrn_lhs(NrnThread* _nt) {
so here we transform so it only has membrane current contribution
*/
double* p = _nt->nrn_fast_imem->nrn_sav_d;
nrn_pragma_acc(parallel loop present(p, vec_d) if (_nt->compute_gpu) async(_nt->streams[_nt->stream_id]))
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
nrn_pragma_acc(parallel loop present(p, vec_d) if (_nt->compute_gpu)
async(_nt->streams[_nt->stream_id]))
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu)
depend(inout: _nt->streams[_nt->stream_id]) nowait)
for (int i = i1; i < i3; ++i) {
p[i] += vec_d[i];
}
Expand Down

0 comments on commit 79d0cfc

Please sign in to comment.