BlueBrain · ohm314 · Oct 12, 2022 · Oct 18, 2022 · Oct 20, 2022 · Oct 24, 2022
diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp
@@ -825,26 +825,7 @@ void realloc_net_receive_buffer(NrnThread* nt, Memb_list* ml) {
         cnrn_target_delete(nrb->_nrb_index, nrb->_size);
     }
 #endif
-    // Reallocate host buffers using ecalloc_align (as in phase2.cpp) and
-    // free_memory (as in nrn_setup.cpp)
-    auto const realloc = [old_size = nrb->_size, nrb](auto*& ptr, std::size_t extra_size = 0) {
-        using T = std::remove_pointer_t<std::remove_reference_t<decltype(ptr)>>;
-        static_assert(std::is_trivial<T>::value,
-                      "Only trivially constructible and copiable types are supported.");
-        static_assert(std::is_same<decltype(ptr), T*&>::value,
-                      "ptr should be reference-to-pointer");
-        auto* const new_data = static_cast<T*>(ecalloc_align((nrb->_size + extra_size), sizeof(T)));
-        std::memcpy(new_data, ptr, (old_size + extra_size) * sizeof(T));
-        free_memory(ptr);
-        ptr = new_data;
-    };
-    nrb->_size *= 2;
-    realloc(nrb->_pnt_index);
-    realloc(nrb->_weight_index);
-    realloc(nrb->_nrb_t);
-    realloc(nrb->_nrb_flag);
-    realloc(nrb->_displ, 1);
-    realloc(nrb->_nrb_index);
+    nrb->grow();
 #ifdef CORENEURON_ENABLE_GPU
     if (nt->compute_gpu) {
         // update device copy

diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp
@@ -764,15 +764,7 @@ void nrn_cleanup() {
 
             NetReceiveBuffer_t* nrb = ml->_net_receive_buffer;
             if (nrb) {
-                if (nrb->_size) {
-                    free_memory(nrb->_pnt_index);
-                    free_memory(nrb->_weight_index);
-                    free_memory(nrb->_nrb_t);
-                    free_memory(nrb->_nrb_flag);
-                    free_memory(nrb->_displ);
-                    free_memory(nrb->_nrb_index);
-                }
-                free_memory(nrb);
+                delete nrb;
                 ml->_net_receive_buffer = nullptr;
             }
 

diff --git a/coreneuron/io/phase2.cpp b/coreneuron/io/phase2.cpp
@@ -479,20 +479,13 @@ void Phase2::set_net_send_buffer(Memb_list** ml_list, const std::vector<int>& pn
         // Does this thread have this type.
         Memb_list* ml = ml_list[type];
         if (ml) {  // needs a NetReceiveBuffer
-            NetReceiveBuffer_t* nrb =
-                (NetReceiveBuffer_t*) ecalloc_align(1, sizeof(NetReceiveBuffer_t));
+            NetReceiveBuffer_t* nrb = new NetReceiveBuffer_t();
             assert(!ml->_net_receive_buffer);
             ml->_net_receive_buffer = nrb;
             nrb->_pnt_offset = pnt_offset[type];
 
             // begin with a size equal to the number of instances, or at least 8
-            nrb->_size = std::max(8, ml->nodecount);
-            nrb->_pnt_index = (int*) ecalloc_align(nrb->_size, sizeof(int));
-            nrb->_displ = (int*) ecalloc_align(nrb->_size + 1, sizeof(int));
-            nrb->_nrb_index = (int*) ecalloc_align(nrb->_size, sizeof(int));
-            nrb->_weight_index = (int*) ecalloc_align(nrb->_size, sizeof(int));
-            nrb->_nrb_t = (double*) ecalloc_align(nrb->_size, sizeof(double));
-            nrb->_nrb_flag = (double*) ecalloc_align(nrb->_size, sizeof(double));
+            nrb->initialize(std::max(8, ml->nodecount));
         }
     }
 

diff --git a/coreneuron/mechanism/mechanism.hpp b/coreneuron/mechanism/mechanism.hpp
@@ -38,51 +38,74 @@ struct Point_process {
     short _tid; /* NrnThread id */
 };
 
-struct NetReceiveBuffer_t {
-    int* _displ;     /* _displ_cnt + 1 of these */
-    int* _nrb_index; /* _cnt of these (order of increasing _pnt_index) */
-
-    int* _pnt_index;
-    int* _weight_index;
-    double* _nrb_t;
-    double* _nrb_flag;
-    int _cnt;
-    int _displ_cnt; /* number of unique _pnt_index */
-    int _size;      /* capacity */
-    int _pnt_offset;
-    size_t size_of_object() {
-        size_t nbytes = 0;
+struct NetReceiveBuffer_t: public UnifiedMemManaged<> {
+    unified_uniq_ptr<int[]> _displ;     /* _displ_cnt + 1 of these */
+    unified_uniq_ptr<int[]> _nrb_index; /* _cnt of these (order of increasing _pnt_index) */
+
+    unified_uniq_ptr<int[]> _pnt_index;
+    unified_uniq_ptr<int[]> _weight_index;
+    unified_uniq_ptr<double[]> _nrb_t;
+    unified_uniq_ptr<double[]> _nrb_flag;
+    int _cnt = 0;
+    int _displ_cnt = 0; /* number of unique _pnt_index */
+
+    std::size_t _size = 0; /* capacity */
+    int _pnt_offset = 0;
+    std::size_t size_of_object() {
+        std::size_t nbytes = 0;
         nbytes += _size * sizeof(int) * 3;
         nbytes += (_size + 1) * sizeof(int);
         nbytes += _size * sizeof(double) * 2;
         return nbytes;
     }
+
+    void initialize(std::size_t size) {
+        _size = size;
+        _pnt_index = allocate_unique<int[]>(allocator<int>{}, _size);
+        auto displ_size = _size + 1;
+        _displ = allocate_unique<int[]>(allocator<int>{}, displ_size);
+        _nrb_index = allocate_unique<int[]>(allocator<int>{}, _size);
+        _weight_index = allocate_unique<int[]>(allocator<int>{}, _size);
+        _nrb_t = allocate_unique<double[]>(allocator<double>{}, _size);
+        _nrb_flag = allocate_unique<double[]>(allocator<double>{}, _size);
+    }
+
+    void grow() {
+        std::size_t new_size = _size * 2;
+        grow_buf(_pnt_index, _size, new_size);
+        grow_buf(_weight_index, _size, new_size);
+        grow_buf(_nrb_t, _size, new_size);
+        grow_buf(_nrb_flag, _size, new_size);
+        grow_buf(_displ, _size + 1, new_size + 1);
+        grow_buf(_nrb_index, _size, new_size);
+        _size = new_size;
+    }
 };
 
-struct NetSendBuffer_t: MemoryManaged {
-    int* _sendtype;  // net_send, net_event, net_move
-    int* _vdata_index;
-    int* _pnt_index;
-    int* _weight_index;
-    double* _nsb_t;
-    double* _nsb_flag;
-    int _cnt;
-    int _size;       /* capacity */
-    int reallocated; /* if buffer resized/reallocated, needs to be copy to cpu */
+struct NetSendBuffer_t: public UnifiedMemManaged<> {
+    unified_uniq_ptr<int[]> _sendtype;  // net_send, net_event, net_move
+    unified_uniq_ptr<int[]> _vdata_index;
+    unified_uniq_ptr<int[]> _pnt_index;
+    unified_uniq_ptr<int[]> _weight_index;
+    unified_uniq_ptr<double[]> _nsb_t;
+    unified_uniq_ptr<double[]> _nsb_flag;
+    int _cnt = 0;
+    std::size_t _size = 0; /* capacity */
+    int reallocated = 0;   /* if buffer resized/reallocated, needs to be copy to cpu */
 
     NetSendBuffer_t(int size)
         : _size(size) {
         _cnt = 0;
 
-        _sendtype = (int*) ecalloc_align(_size, sizeof(int));
-        _vdata_index = (int*) ecalloc_align(_size, sizeof(int));
-        _pnt_index = (int*) ecalloc_align(_size, sizeof(int));
-        _weight_index = (int*) ecalloc_align(_size, sizeof(int));
+        _sendtype = allocate_unique<int[]>(allocator<int>{}, _size);
+        _vdata_index = allocate_unique<int[]>(allocator<int>{}, _size);
+        _pnt_index = allocate_unique<int[]>(allocator<int>{}, _size);
+        _weight_index = allocate_unique<int[]>(allocator<int>{}, _size);
         // when == 1, NetReceiveBuffer_t is newly allocated (i.e. we need to free previous copy
         // and recopy new data
         reallocated = 1;
-        _nsb_t = (double*) ecalloc_align(_size, sizeof(double));
-        _nsb_flag = (double*) ecalloc_align(_size, sizeof(double));
+        _nsb_t = allocate_unique<double[]>(allocator<double>{}, _size);
+        _nsb_flag = allocate_unique<double[]>(allocator<double>{}, _size);
     }
 
     size_t size_of_object() {
@@ -92,39 +115,15 @@ struct NetSendBuffer_t: MemoryManaged {
         return nbytes;
     }
 
-    ~NetSendBuffer_t() {
-        free_memory(_sendtype);
-        free_memory(_vdata_index);
-        free_memory(_pnt_index);
-        free_memory(_weight_index);
-        free_memory(_nsb_t);
-        free_memory(_nsb_flag);
-    }
-
     void grow() {
-#ifdef CORENEURON_ENABLE_GPU
-        int cannot_reallocate_on_device = 0;
-        assert(cannot_reallocate_on_device);
-#else
-        int new_size = _size * 2;
-        grow_buf(&_sendtype, _size, new_size);
-        grow_buf(&_vdata_index, _size, new_size);
-        grow_buf(&_pnt_index, _size, new_size);
-        grow_buf(&_weight_index, _size, new_size);
-        grow_buf(&_nsb_t, _size, new_size);
-        grow_buf(&_nsb_flag, _size, new_size);
+        std::size_t new_size = _size * 2;
+        grow_buf(_sendtype, _size, new_size);
+        grow_buf(_vdata_index, _size, new_size);
+        grow_buf(_pnt_index, _size, new_size);
+        grow_buf(_weight_index, _size, new_size);
+        grow_buf(_nsb_t, _size, new_size);
+        grow_buf(_nsb_flag, _size, new_size);
         _size = new_size;
-#endif
-    }
-
-  private:
-    template <typename T>
-    void grow_buf(T** buf, int size, int new_size) {
-        T* new_buf = nullptr;
-        new_buf = (T*) ecalloc_align(new_size, sizeof(T));
-        memcpy(new_buf, *buf, size * sizeof(T));
-        free(*buf);
-        *buf = new_buf;
     }
 };
 

diff --git a/coreneuron/permute/cellorder.cpp b/coreneuron/permute/cellorder.cpp
@@ -50,11 +50,11 @@ InterleaveInfo::InterleaveInfo(const InterleaveInfo& info) {
     nwarp = info.nwarp;
     nstride = info.nstride;
 
-    copy_align_array(stridedispl, info.stridedispl, nwarp + 1);
-    copy_align_array(stride, info.stride, nstride);
-    copy_align_array(firstnode, info.firstnode, nwarp + 1);
-    copy_align_array(lastnode, info.lastnode, nwarp + 1);
-    copy_align_array(cellsize, info.cellsize, nwarp);
+    copy_array(stridedispl, info.stridedispl, nwarp + 1);
+    copy_array(stride, info.stride, nstride);
+    copy_array(firstnode, info.firstnode, nwarp + 1);
+    copy_array(lastnode, info.lastnode, nwarp + 1);
+    copy_array(cellsize, info.cellsize, nwarp);
 
     copy_array(nnode, info.nnode, nwarp);
     copy_array(ncycle, info.ncycle, nwarp);
@@ -74,25 +74,6 @@ InterleaveInfo& InterleaveInfo::operator=(const InterleaveInfo& info) {
     return *this;
 }
 
-InterleaveInfo::~InterleaveInfo() {
-    if (stride) {
-        free_memory(stride);
-        free_memory(firstnode);
-        free_memory(lastnode);
-        free_memory(cellsize);
-    }
-    if (stridedispl) {
-        free_memory(stridedispl);
-    }
-    if (idle) {
-        delete[] nnode;
-        delete[] ncycle;
-        delete[] idle;
-        delete[] cache_access;
-        delete[] child_race;
-    }
-}
-
 void create_interleave_info() {
     destroy_interleave_info();
     interleave_info = new InterleaveInfo[nrn_nthread];
@@ -299,8 +280,13 @@ int* interleave_order(int ith, int ncell, int nnode, int* parent) {
         }
     }
 
-    int nwarp = 0, nstride = 0, *stride = nullptr, *firstnode = nullptr;
-    int *lastnode = nullptr, *cellsize = nullptr, *stridedispl = nullptr;
+    int nwarp = 0;
+    int nstride = 0;
+    int* stride = nullptr;
+    int* firstnode = nullptr;
+    int* lastnode = nullptr;
+    int* cellsize = nullptr;
+    int* stridedispl = nullptr;
 
     int* order = node_order(
         ncell, nnode, parent, nwarp, nstride, stride, firstnode, lastnode, cellsize, stridedispl);

diff --git a/coreneuron/permute/cellorder.hpp b/coreneuron/permute/cellorder.hpp
@@ -47,12 +47,11 @@ class InterleaveInfo;  // forward declaration
  */
 void solve_interleaved2_launcher(NrnThread* nt, InterleaveInfo* info, int ncore, void* stream);
 
-class InterleaveInfo: public MemoryManaged {
+class InterleaveInfo: public UnifiedMemManaged<> {
   public:
     InterleaveInfo() = default;
     InterleaveInfo(const InterleaveInfo&);
     InterleaveInfo& operator=(const InterleaveInfo&);
-    ~InterleaveInfo();
     int nwarp = 0;  // used only by interleave2
     int nstride = 0;
     int* stridedispl = nullptr;  // interleave2: nwarp+1
@@ -106,17 +105,10 @@ int* node_order(int ncell,
                 int*& cellsize,
                 int*& stridedispl);
 
-// copy src array to dest with new allocation
-template <typename T>
-void copy_array(T*& dest, T* src, size_t n) {
-    dest = new T[n];
-    std::copy(src, src + n, dest);
-}
-
 // copy src array to dest with NRN_SOA_BYTE_ALIGN ecalloc_align allocation
 template <typename T>
-void copy_align_array(T*& dest, T* src, size_t n) {
-    dest = static_cast<T*>(ecalloc_align(n, sizeof(T)));
+void copy_array(T*& dest, T* src, size_t n) {
+    dest = static_cast<T*>(allocate_unified(n * sizeof(T)));
     std::copy(src, src + n, dest);
 }
 

diff --git a/coreneuron/sim/multicore.hpp b/coreneuron/sim/multicore.hpp
@@ -36,6 +36,8 @@ struct NrnThreadMembList { /* patterned after CvMembList in cvodeobj.h */
     int* dependencies; /* list of mechanism types that this mechanism depends on*/
     int ndependencies; /* for scheduling we need to know the dependency count */
 };
+
+
 NrnThreadMembList* create_tml(NrnThread& nt,
                               int mech_id,
                               Memb_func& memb_func,
@@ -72,7 +74,7 @@ struct PreSynHelper {
     int flag_;
 };
 
-struct NrnThread: public MemoryManaged {
+struct NrnThread: UnifiedMemManaged<> {
     double _t = 0;
     double _dt = -1e9;
     double cj = 0.0;