diff --git a/src/genn/backends/cuda/backend.cc b/src/genn/backends/cuda/backend.cc index 4ba6c1619a..51eab0bff3 100644 --- a/src/genn/backends/cuda/backend.cc +++ b/src/genn/backends/cuda/backend.cc @@ -931,7 +931,7 @@ void Backend::genInit(CodeStream &os, const ModelSpecMerged &modelMerged, // Sparse initialization kernel code size_t idSparseInitStart = 0; - if(!modelMerged.getMergedSynapseSparseInitGroups().empty()) { + if(!modelMerged.getMergedSynapseSparseInitGroups().empty() || !modelMerged.getMergedCustomWUUpdateSparseInitGroups().empty()) { os << "extern \"C\" __global__ void " << KernelNames[KernelInitializeSparse] << "()"; { CodeStream::Scope b(os); diff --git a/src/genn/backends/cuda/optimiser.cc b/src/genn/backends/cuda/optimiser.cc index d034d473bd..3729b146bc 100644 --- a/src/genn/backends/cuda/optimiser.cc +++ b/src/genn/backends/cuda/optimiser.cc @@ -163,6 +163,9 @@ void calcGroupSizes(const CUDA::Preferences &preferences, const ModelSpecInterna // Loop through custom updates, add size to vector of custom update groups and update group name to set for(const auto &c : model.getCustomUpdates()) { groupSizes[KernelCustomUpdate].push_back(c.second.isBatched() ? (model.getBatchSize() * c.second.getSize()) : c.second.getSize()); + if(c.second.isVarInitRequired()) { + groupSizes[KernelInitialize].push_back(c.second.getSize()); + } customUpdateKernels.insert(c.second.getUpdateGroupName()); } @@ -186,6 +189,12 @@ void calcGroupSizes(const CUDA::Preferences &preferences, const ModelSpecInterna groupSizes[KernelCustomUpdate].push_back(numCopies * sgInternal->getSrcNeuronGroup()->getNumNeurons() * sgInternal->getTrgNeuronGroup()->getNumNeurons()); } } + + if(c.second.isVarInitRequired()) { + if(sgInternal->getMatrixType() & SynapseMatrixConnectivity::SPARSE) { + groupSizes[KernelInitializeSparse].push_back(sgInternal->getMaxConnections()); + } + } } // Loop through synapse groups @@ -667,7 +676,7 @@ int chooseOptimalDevice(const ModelSpecInternal &model, KernelBlockSize &blockSi // Find ID of best device const int bestDeviceID = (int)std::distance(devices.cbegin(), bestDevice); - LOGI_BACKEND << "Optimal device " << bestDeviceID << " - total occupancy:" << std::get<1>(*bestDevice) << ", number of small models:" << std::get<2>(*bestDevice) << ", SM version:" << std::get<0>(*bestDevice); + LOGI_BACKEND << "Optimal device " << bestDeviceID << " - total occupancy:" << std::get<1>(*bestDevice) << ", number of small models:" << std::get<2>(*bestDevice) << ", SM version:" << std::get<0>(*bestDevice); // Get optimal block size from best device blockSize = std::get<3>(*bestDevice); diff --git a/src/genn/backends/opencl/backend.cc b/src/genn/backends/opencl/backend.cc index 17a9416322..9a2d968f46 100644 --- a/src/genn/backends/opencl/backend.cc +++ b/src/genn/backends/opencl/backend.cc @@ -1181,7 +1181,7 @@ void Backend::genInit(CodeStream &os, const ModelSpecMerged &modelMerged, // Generate sparse initialisation kernel size_t idSparseInitStart = 0; - if(!modelMerged.getMergedSynapseSparseInitGroups().empty()) { + if(!modelMerged.getMergedSynapseSparseInitGroups().empty() || !modelMerged.getMergedCustomWUUpdateSparseInitGroups().empty()) { initializeKernels << "__attribute__((reqd_work_group_size(" << getKernelBlockSize(KernelInitializeSparse) << ", 1, 1)))" << std::endl; initializeKernels << "__kernel void " << KernelNames[KernelInitializeSparse] << "("; const bool anyCustomWUUpdateSparseInitGroups = !modelMerged.getMergedCustomWUUpdateSparseInitGroups().empty();