diff --git a/coreneuron/network/netcvode.cpp b/coreneuron/network/netcvode.cpp
index d7e743edd..60192e8af 100644
--- a/coreneuron/network/netcvode.cpp
+++ b/coreneuron/network/netcvode.cpp
@@ -533,8 +533,10 @@ void NetCvode::check_thresh(NrnThread* nt) {  // for default method
 
     nrn_pragma_acc(parallel loop present(
         nt [0:1], presyns_helper [0:nt->n_presyn], presyns [0:nt->n_presyn], actual_v [0:nt->end])
-                       copy(net_send_buf_count) if (nt->compute_gpu) async(nt->streams[nt->stream_id]))
-    nrn_pragma_omp(target teams distribute parallel for map(tofrom: net_send_buf_count) if(nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
+                       copy(net_send_buf_count) if (nt->compute_gpu)
+                           async(nt->streams[nt->stream_id]))
+    nrn_pragma_omp(target teams distribute parallel for map(tofrom: net_send_buf_count)
+                   if(nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
     for (int i = 0; i < nt->ncell; ++i) {
         PreSyn* ps = presyns + i;
         PreSynHelper* psh = presyns_helper + i;
@@ -569,9 +571,13 @@ void NetCvode::check_thresh(NrnThread* nt) {  // for default method
 #ifdef CORENEURON_ENABLE_GPU
         int* nsbuffer = nt->_net_send_buffer;
 #endif
-        nrn_pragma_acc(update host(nsbuffer [0:nt->_net_send_buffer_cnt]) async(nt->streams[nt->stream_id]))
+        nrn_pragma_acc(update host(nsbuffer [0:nt->_net_send_buffer_cnt])
+                           async(nt->streams[nt->stream_id]))
         nrn_pragma_acc(wait async(nt->streams[nt->stream_id]))
-        nrn_pragma_omp(target update from(nsbuffer [0:nt->_net_send_buffer_cnt]) depend(inout: nt->streams[nt->stream_id]) nowait)
+        // clang-format off
+        nrn_pragma_omp(target update from(nsbuffer [0:nt->_net_send_buffer_cnt])
+                           depend(inout: nt->streams[nt->stream_id]) nowait)
+        // clang-format on
         nrn_pragma_omp(taskwait)
     }
 
diff --git a/coreneuron/network/partrans.cpp b/coreneuron/network/partrans.cpp
index 066ca15bb..a39458f25 100644
--- a/coreneuron/network/partrans.cpp
+++ b/coreneuron/network/partrans.cpp
@@ -63,7 +63,10 @@ void nrnmpi_v_transfer() {
         }
         nrn_pragma_acc(update host(src_gather [0:n_src_gather]) if (nt->compute_gpu)
                            async(nt->streams[nt->stream_id]))
-        nrn_pragma_omp(target update from(src_gather [0:n_src_gather]) if (nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
+        // clang-format off
+        nrn_pragma_omp(target update from(src_gather [0:n_src_gather]) if (nt->compute_gpu)
+                           depend(inout: nt->streams[nt->stream_id]) nowait)
+        // clang-format on
     }
 
     // copy gathered source values to outsrc_buf_
diff --git a/coreneuron/permute/cellorder.cpp b/coreneuron/permute/cellorder.cpp
index e1ee3fd39..c9a93bcd9 100644
--- a/coreneuron/permute/cellorder.cpp
+++ b/coreneuron/permute/cellorder.cpp
@@ -600,14 +600,18 @@ void solve_interleaved2(int ith) {
     defined(_OPENACC)
         int nstride = stridedispl[nwarp];
 #endif
-        nrn_pragma_acc(parallel loop gang vector vector_length(
-            warpsize) present(nt [0:1],
-                              strides [0:nstride],
-                              ncycles [0:nwarp],
-                              stridedispl [0:nwarp + 1],
-                              rootbegin [0:nwarp + 1],
-                              nodebegin [0:nwarp + 1]) if (nt->compute_gpu) async(nt->streams[nt->stream_id]))
-        nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
+        nrn_pragma_acc(parallel loop gang vector vector_length(warpsize)
+                           present(nt [0:1],
+                                   strides [0:nstride],
+                                   ncycles [0:nwarp],
+                                   stridedispl [0:nwarp + 1],
+                                   rootbegin [0:nwarp + 1],
+                                   nodebegin [0:nwarp + 1]) if (nt->compute_gpu)
+                               async(nt->streams[nt->stream_id]))
+        // clang-format off
+        nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu)
+                           depend(inout: nt->streams[nt->stream_id]) nowait)
+        // clang-format on
         for (int icore = 0; icore < ncore; ++icore) {
             int iwarp = icore / warpsize;     // figure out the >> value
             int ic = icore & (warpsize - 1);  // figure out the & mask
diff --git a/coreneuron/sim/fadvance_core.cpp b/coreneuron/sim/fadvance_core.cpp
index 1d6ffdfcb..ad71f4c7a 100644
--- a/coreneuron/sim/fadvance_core.cpp
+++ b/coreneuron/sim/fadvance_core.cpp
@@ -317,7 +317,10 @@ void nrncore2nrn_send_values(NrnThread* nth) {
                 double* gather_i = tr->gather[i];
                 nrn_pragma_acc(update self(gather_i [0:1]) if (nth->compute_gpu)
                                    async(nth->streams[nth->stream_id]))
-                nrn_pragma_omp(target update from(gather_i [0:1]) if (nth->compute_gpu) depend(inout: nth->streams[nth->stream_id]) nowait)
+                // clang-format off
+                nrn_pragma_omp(target update from(gather_i [0:1]) if (nth->compute_gpu)
+                                   depend(inout: nth->streams[nth->stream_id]) nowait)
+                // clang-format on
             }
             nrn_pragma_acc(wait async(nth->streams[nth->stream_id]))
             for (int i = 0; i < tr->n_trajec; ++i) {
@@ -341,7 +344,8 @@ static void* nrn_fixed_step_thread(NrnThread* nth) {
     if (nth->ncell) {
         /*@todo: do we need to update nth->_t on GPU: Yes (Michael, but can
         launch kernel) */
-        nrn_pragma_acc(update device(nth->_t) if (nth->compute_gpu) async(nth->streams[nth->stream_id]))
+        nrn_pragma_acc(update device(nth->_t) if (nth->compute_gpu)
+                           async(nth->streams[nth->stream_id]))
         nrn_pragma_acc(wait async(nth->streams[nth->stream_id]))
         nrn_pragma_omp(target update to(nth->_t) if (nth->compute_gpu))
         fixed_play_continuous(nth);
@@ -377,7 +381,8 @@ void* nrn_fixed_step_lastpart(NrnThread* nth) {
 
     if (nth->ncell) {
         /*@todo: do we need to update nth->_t on GPU */
-        nrn_pragma_acc(update device(nth->_t) if (nth->compute_gpu) async(nth->streams[nth->stream_id]))
+        nrn_pragma_acc(update device(nth->_t) if (nth->compute_gpu)
+                           async(nth->streams[nth->stream_id]))
         nrn_pragma_acc(wait async(nth->streams[nth->stream_id]))
         nrn_pragma_omp(target update to(nth->_t) if (nth->compute_gpu))
         fixed_play_continuous(nth);
diff --git a/coreneuron/sim/multicore.hpp b/coreneuron/sim/multicore.hpp
index 56f8d3af8..44189191d 100644
--- a/coreneuron/sim/multicore.hpp
+++ b/coreneuron/sim/multicore.hpp
@@ -130,10 +130,11 @@ struct NrnThread: public MemoryManaged {
 
     NrnThreadBAList* tbl[BEFORE_AFTER_SIZE]; /* wasteful since almost all empty */
 
-    int shadow_rhs_cnt = 0;    /* added to facilitate the NrnThread transfer to GPU */
-    int compute_gpu = 0;       /* define whether to compute with gpus */
-    int stream_id = 0;         /* define where the kernel will be launched on GPU stream */
-    std::vector<int> streams;  /* vector of stream ids needed for async execution of OpenMP in multiple streams */
+    int shadow_rhs_cnt = 0;   /* added to facilitate the NrnThread transfer to GPU */
+    int compute_gpu = 0;      /* define whether to compute with gpus */
+    int stream_id = 0;        /* define where the kernel will be launched on GPU stream */
+    std::vector<int> streams; /* vector of stream ids needed for async execution of OpenMP in
+                                 multiple streams */
     int _net_send_buffer_size = 0;
     int _net_send_buffer_cnt = 0;
     int* _net_send_buffer = nullptr;
diff --git a/coreneuron/sim/treeset_core.cpp b/coreneuron/sim/treeset_core.cpp
index 7f6f1d3af..42de967d0 100644
--- a/coreneuron/sim/treeset_core.cpp
+++ b/coreneuron/sim/treeset_core.cpp
@@ -152,8 +152,10 @@ static void nrn_lhs(NrnThread* _nt) {
            so here we transform so it only has membrane current contribution
         */
         double* p = _nt->nrn_fast_imem->nrn_sav_d;
-        nrn_pragma_acc(parallel loop present(p, vec_d) if (_nt->compute_gpu) async(_nt->streams[_nt->stream_id]))
-        nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
+        nrn_pragma_acc(parallel loop present(p, vec_d) if (_nt->compute_gpu)
+                           async(_nt->streams[_nt->stream_id]))
+        nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu)
+                           depend(inout: _nt->streams[_nt->stream_id]) nowait)
         for (int i = i1; i < i3; ++i) {
             p[i] += vec_d[i];
         }