Skip to content
This repository has been archived by the owner on Mar 20, 2023. It is now read-only.

New allocators with sensible names and more #872

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 1 addition & 20 deletions coreneuron/gpu/nrn_acc_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -825,26 +825,7 @@ void realloc_net_receive_buffer(NrnThread* nt, Memb_list* ml) {
cnrn_target_delete(nrb->_nrb_index, nrb->_size);
}
#endif
// Reallocate host buffers using ecalloc_align (as in phase2.cpp) and
// free_memory (as in nrn_setup.cpp)
auto const realloc = [old_size = nrb->_size, nrb](auto*& ptr, std::size_t extra_size = 0) {
using T = std::remove_pointer_t<std::remove_reference_t<decltype(ptr)>>;
static_assert(std::is_trivial<T>::value,
"Only trivially constructible and copiable types are supported.");
static_assert(std::is_same<decltype(ptr), T*&>::value,
"ptr should be reference-to-pointer");
auto* const new_data = static_cast<T*>(ecalloc_align((nrb->_size + extra_size), sizeof(T)));
std::memcpy(new_data, ptr, (old_size + extra_size) * sizeof(T));
free_memory(ptr);
ptr = new_data;
};
nrb->_size *= 2;
realloc(nrb->_pnt_index);
realloc(nrb->_weight_index);
realloc(nrb->_nrb_t);
realloc(nrb->_nrb_flag);
realloc(nrb->_displ, 1);
realloc(nrb->_nrb_index);
nrb->grow();
#ifdef CORENEURON_ENABLE_GPU
if (nt->compute_gpu) {
// update device copy
Expand Down
10 changes: 1 addition & 9 deletions coreneuron/io/nrn_setup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -764,15 +764,7 @@ void nrn_cleanup() {

NetReceiveBuffer_t* nrb = ml->_net_receive_buffer;
if (nrb) {
if (nrb->_size) {
free_memory(nrb->_pnt_index);
free_memory(nrb->_weight_index);
free_memory(nrb->_nrb_t);
free_memory(nrb->_nrb_flag);
free_memory(nrb->_displ);
free_memory(nrb->_nrb_index);
}
free_memory(nrb);
delete nrb;
ml->_net_receive_buffer = nullptr;
}

Expand Down
11 changes: 2 additions & 9 deletions coreneuron/io/phase2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,20 +479,13 @@ void Phase2::set_net_send_buffer(Memb_list** ml_list, const std::vector<int>& pn
// Does this thread have this type.
Memb_list* ml = ml_list[type];
if (ml) { // needs a NetReceiveBuffer
NetReceiveBuffer_t* nrb =
(NetReceiveBuffer_t*) ecalloc_align(1, sizeof(NetReceiveBuffer_t));
NetReceiveBuffer_t* nrb = new NetReceiveBuffer_t();
assert(!ml->_net_receive_buffer);
ml->_net_receive_buffer = nrb;
nrb->_pnt_offset = pnt_offset[type];

// begin with a size equal to the number of instances, or at least 8
nrb->_size = std::max(8, ml->nodecount);
nrb->_pnt_index = (int*) ecalloc_align(nrb->_size, sizeof(int));
nrb->_displ = (int*) ecalloc_align(nrb->_size + 1, sizeof(int));
nrb->_nrb_index = (int*) ecalloc_align(nrb->_size, sizeof(int));
nrb->_weight_index = (int*) ecalloc_align(nrb->_size, sizeof(int));
nrb->_nrb_t = (double*) ecalloc_align(nrb->_size, sizeof(double));
nrb->_nrb_flag = (double*) ecalloc_align(nrb->_size, sizeof(double));
nrb->initialize(std::max(8, ml->nodecount));
}
}

Expand Down
121 changes: 60 additions & 61 deletions coreneuron/mechanism/mechanism.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,51 +38,74 @@ struct Point_process {
short _tid; /* NrnThread id */
};

struct NetReceiveBuffer_t {
int* _displ; /* _displ_cnt + 1 of these */
int* _nrb_index; /* _cnt of these (order of increasing _pnt_index) */

int* _pnt_index;
int* _weight_index;
double* _nrb_t;
double* _nrb_flag;
int _cnt;
int _displ_cnt; /* number of unique _pnt_index */
int _size; /* capacity */
int _pnt_offset;
size_t size_of_object() {
size_t nbytes = 0;
struct NetReceiveBuffer_t: public UnifiedMemManaged<> {
unified_uniq_ptr<int[]> _displ; /* _displ_cnt + 1 of these */
unified_uniq_ptr<int[]> _nrb_index; /* _cnt of these (order of increasing _pnt_index) */

unified_uniq_ptr<int[]> _pnt_index;
unified_uniq_ptr<int[]> _weight_index;
unified_uniq_ptr<double[]> _nrb_t;
unified_uniq_ptr<double[]> _nrb_flag;
int _cnt = 0;
int _displ_cnt = 0; /* number of unique _pnt_index */

std::size_t _size = 0; /* capacity */
int _pnt_offset = 0;
std::size_t size_of_object() {
std::size_t nbytes = 0;
nbytes += _size * sizeof(int) * 3;
nbytes += (_size + 1) * sizeof(int);
nbytes += _size * sizeof(double) * 2;
return nbytes;
}

void initialize(std::size_t size) {
_size = size;
_pnt_index = allocate_unique<int[]>(allocator<int>{}, _size);
auto displ_size = _size + 1;
_displ = allocate_unique<int[]>(allocator<int>{}, displ_size);
_nrb_index = allocate_unique<int[]>(allocator<int>{}, _size);
_weight_index = allocate_unique<int[]>(allocator<int>{}, _size);
_nrb_t = allocate_unique<double[]>(allocator<double>{}, _size);
_nrb_flag = allocate_unique<double[]>(allocator<double>{}, _size);
}

void grow() {
std::size_t new_size = _size * 2;
grow_buf(_pnt_index, _size, new_size);
grow_buf(_weight_index, _size, new_size);
grow_buf(_nrb_t, _size, new_size);
grow_buf(_nrb_flag, _size, new_size);
grow_buf(_displ, _size + 1, new_size + 1);
grow_buf(_nrb_index, _size, new_size);
_size = new_size;
}
};

struct NetSendBuffer_t: MemoryManaged {
int* _sendtype; // net_send, net_event, net_move
int* _vdata_index;
int* _pnt_index;
int* _weight_index;
double* _nsb_t;
double* _nsb_flag;
int _cnt;
int _size; /* capacity */
int reallocated; /* if buffer resized/reallocated, needs to be copy to cpu */
struct NetSendBuffer_t: public UnifiedMemManaged<> {
unified_uniq_ptr<int[]> _sendtype; // net_send, net_event, net_move
unified_uniq_ptr<int[]> _vdata_index;
unified_uniq_ptr<int[]> _pnt_index;
unified_uniq_ptr<int[]> _weight_index;
unified_uniq_ptr<double[]> _nsb_t;
unified_uniq_ptr<double[]> _nsb_flag;
int _cnt = 0;
std::size_t _size = 0; /* capacity */
int reallocated = 0; /* if buffer resized/reallocated, needs to be copy to cpu */

NetSendBuffer_t(int size)
: _size(size) {
_cnt = 0;

_sendtype = (int*) ecalloc_align(_size, sizeof(int));
_vdata_index = (int*) ecalloc_align(_size, sizeof(int));
_pnt_index = (int*) ecalloc_align(_size, sizeof(int));
_weight_index = (int*) ecalloc_align(_size, sizeof(int));
_sendtype = allocate_unique<int[]>(allocator<int>{}, _size);
_vdata_index = allocate_unique<int[]>(allocator<int>{}, _size);
_pnt_index = allocate_unique<int[]>(allocator<int>{}, _size);
_weight_index = allocate_unique<int[]>(allocator<int>{}, _size);
// when == 1, NetReceiveBuffer_t is newly allocated (i.e. we need to free previous copy
// and recopy new data
reallocated = 1;
_nsb_t = (double*) ecalloc_align(_size, sizeof(double));
_nsb_flag = (double*) ecalloc_align(_size, sizeof(double));
_nsb_t = allocate_unique<double[]>(allocator<double>{}, _size);
_nsb_flag = allocate_unique<double[]>(allocator<double>{}, _size);
}

size_t size_of_object() {
Expand All @@ -92,39 +115,15 @@ struct NetSendBuffer_t: MemoryManaged {
return nbytes;
}

~NetSendBuffer_t() {
free_memory(_sendtype);
free_memory(_vdata_index);
free_memory(_pnt_index);
free_memory(_weight_index);
free_memory(_nsb_t);
free_memory(_nsb_flag);
}

void grow() {
#ifdef CORENEURON_ENABLE_GPU
int cannot_reallocate_on_device = 0;
assert(cannot_reallocate_on_device);
#else
int new_size = _size * 2;
grow_buf(&_sendtype, _size, new_size);
grow_buf(&_vdata_index, _size, new_size);
grow_buf(&_pnt_index, _size, new_size);
grow_buf(&_weight_index, _size, new_size);
grow_buf(&_nsb_t, _size, new_size);
grow_buf(&_nsb_flag, _size, new_size);
std::size_t new_size = _size * 2;
grow_buf(_sendtype, _size, new_size);
grow_buf(_vdata_index, _size, new_size);
grow_buf(_pnt_index, _size, new_size);
grow_buf(_weight_index, _size, new_size);
grow_buf(_nsb_t, _size, new_size);
grow_buf(_nsb_flag, _size, new_size);
_size = new_size;
#endif
}

private:
template <typename T>
void grow_buf(T** buf, int size, int new_size) {
T* new_buf = nullptr;
new_buf = (T*) ecalloc_align(new_size, sizeof(T));
memcpy(new_buf, *buf, size * sizeof(T));
free(*buf);
*buf = new_buf;
}
};

Expand Down
38 changes: 12 additions & 26 deletions coreneuron/permute/cellorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ InterleaveInfo::InterleaveInfo(const InterleaveInfo& info) {
nwarp = info.nwarp;
nstride = info.nstride;

copy_align_array(stridedispl, info.stridedispl, nwarp + 1);
copy_align_array(stride, info.stride, nstride);
copy_align_array(firstnode, info.firstnode, nwarp + 1);
copy_align_array(lastnode, info.lastnode, nwarp + 1);
copy_align_array(cellsize, info.cellsize, nwarp);
copy_array(stridedispl, info.stridedispl, nwarp + 1);
copy_array(stride, info.stride, nstride);
copy_array(firstnode, info.firstnode, nwarp + 1);
copy_array(lastnode, info.lastnode, nwarp + 1);
copy_array(cellsize, info.cellsize, nwarp);

copy_array(nnode, info.nnode, nwarp);
copy_array(ncycle, info.ncycle, nwarp);
Expand All @@ -74,25 +74,6 @@ InterleaveInfo& InterleaveInfo::operator=(const InterleaveInfo& info) {
return *this;
}

InterleaveInfo::~InterleaveInfo() {
if (stride) {
free_memory(stride);
free_memory(firstnode);
free_memory(lastnode);
free_memory(cellsize);
}
if (stridedispl) {
free_memory(stridedispl);
}
if (idle) {
delete[] nnode;
delete[] ncycle;
delete[] idle;
delete[] cache_access;
delete[] child_race;
}
}

void create_interleave_info() {
destroy_interleave_info();
interleave_info = new InterleaveInfo[nrn_nthread];
Expand Down Expand Up @@ -299,8 +280,13 @@ int* interleave_order(int ith, int ncell, int nnode, int* parent) {
}
}

int nwarp = 0, nstride = 0, *stride = nullptr, *firstnode = nullptr;
int *lastnode = nullptr, *cellsize = nullptr, *stridedispl = nullptr;
int nwarp = 0;
int nstride = 0;
int* stride = nullptr;
int* firstnode = nullptr;
int* lastnode = nullptr;
int* cellsize = nullptr;
int* stridedispl = nullptr;

int* order = node_order(
ncell, nnode, parent, nwarp, nstride, stride, firstnode, lastnode, cellsize, stridedispl);
Expand Down
14 changes: 3 additions & 11 deletions coreneuron/permute/cellorder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,11 @@ class InterleaveInfo; // forward declaration
*/
void solve_interleaved2_launcher(NrnThread* nt, InterleaveInfo* info, int ncore, void* stream);

class InterleaveInfo: public MemoryManaged {
class InterleaveInfo: public UnifiedMemManaged<> {
public:
InterleaveInfo() = default;
InterleaveInfo(const InterleaveInfo&);
InterleaveInfo& operator=(const InterleaveInfo&);
~InterleaveInfo();
int nwarp = 0; // used only by interleave2
int nstride = 0;
int* stridedispl = nullptr; // interleave2: nwarp+1
Expand Down Expand Up @@ -106,17 +105,10 @@ int* node_order(int ncell,
int*& cellsize,
int*& stridedispl);

// copy src array to dest with new allocation
template <typename T>
void copy_array(T*& dest, T* src, size_t n) {
dest = new T[n];
std::copy(src, src + n, dest);
}

// copy src array to dest with NRN_SOA_BYTE_ALIGN ecalloc_align allocation
template <typename T>
void copy_align_array(T*& dest, T* src, size_t n) {
dest = static_cast<T*>(ecalloc_align(n, sizeof(T)));
void copy_array(T*& dest, T* src, size_t n) {
dest = static_cast<T*>(allocate_unified(n * sizeof(T)));
std::copy(src, src + n, dest);
}

Expand Down
4 changes: 3 additions & 1 deletion coreneuron/sim/multicore.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ struct NrnThreadMembList { /* patterned after CvMembList in cvodeobj.h */
int* dependencies; /* list of mechanism types that this mechanism depends on*/
int ndependencies; /* for scheduling we need to know the dependency count */
};


NrnThreadMembList* create_tml(NrnThread& nt,
int mech_id,
Memb_func& memb_func,
Expand Down Expand Up @@ -72,7 +74,7 @@ struct PreSynHelper {
int flag_;
};

struct NrnThread: public MemoryManaged {
struct NrnThread: UnifiedMemManaged<> {
double _t = 0;
double _dt = -1e9;
double cj = 0.0;
Expand Down
Loading