-
Notifications
You must be signed in to change notification settings - Fork 110
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #240 from the-virtual-brain/dsl-cuda
Add CUDA support for the DSL and executing parameter sweeps
- Loading branch information
Showing
46 changed files
with
10,272 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,269 @@ | ||
#include <stdio.h> // for printf | ||
#define PI_2 (2 * M_PI_F) | ||
|
||
// buffer length defaults to the argument to the integrate kernel | ||
// but if it's known at compile time, it can be provided which allows | ||
// compiler to change i%n to i&(n-1) if n is a power of two. | ||
#ifndef NH | ||
#define NH nh | ||
#endif | ||
|
||
#ifndef WARP_SIZE | ||
#define WARP_SIZE 32 | ||
#endif | ||
|
||
#include <curand_kernel.h> | ||
#include <curand.h> | ||
#include <stdbool.h> | ||
|
||
__device__ float wrap_it_PI(float x) | ||
{ | ||
bool neg_mask = x < 0.0f; | ||
bool pos_mask = !neg_mask; | ||
// fmodf diverges 51% of time | ||
float pos_val = fmodf(x, PI_2); | ||
float neg_val = PI_2 - fmodf(-x, PI_2); | ||
return neg_mask * neg_val + pos_mask * pos_val; | ||
} | ||
__device__ float wrap_it_x1(float x1) | ||
{ | ||
float x1dim[] = {-2.0, 1.0}; | ||
if (x1 < x1dim[0]) x1 = x1dim[0]; | ||
else if (x1 > x1dim[1]) x1 = x1dim[1]; | ||
|
||
return x1; | ||
} | ||
__device__ float wrap_it_y1(float y1) | ||
{ | ||
float y1dim[] = {-20.0, 2.0}; | ||
if (y1 < y1dim[0]) y1 = y1dim[0]; | ||
else if (y1 > y1dim[1]) y1 = y1dim[1]; | ||
|
||
return y1; | ||
} | ||
__device__ float wrap_it_z(float z) | ||
{ | ||
float zdim[] = {-2.0, 5.0}; | ||
if (z < zdim[0]) z = zdim[0]; | ||
else if (z > zdim[1]) z = zdim[1]; | ||
|
||
return z; | ||
} | ||
__device__ float wrap_it_x2(float x2) | ||
{ | ||
float x2dim[] = {-2.0, 0.0}; | ||
if (x2 < x2dim[0]) x2 = x2dim[0]; | ||
else if (x2 > x2dim[1]) x2 = x2dim[1]; | ||
|
||
return x2; | ||
} | ||
__device__ float wrap_it_y2(float y2) | ||
{ | ||
float y2dim[] = {0.0, 2.0}; | ||
if (y2 < y2dim[0]) y2 = y2dim[0]; | ||
else if (y2 > y2dim[1]) y2 = y2dim[1]; | ||
|
||
return y2; | ||
} | ||
__device__ float wrap_it_g(float g) | ||
{ | ||
float gdim[] = {-1.0, 1.0}; | ||
if (g < gdim[0]) g = gdim[0]; | ||
else if (g > gdim[1]) g = gdim[1]; | ||
|
||
return g; | ||
} | ||
|
||
__global__ void Epileptor( | ||
|
||
// config | ||
unsigned int i_step, unsigned int n_node, unsigned int nh, unsigned int n_step, unsigned int n_params, | ||
float dt, float speed, float * __restrict__ weights, float * __restrict__ lengths, | ||
float * __restrict__ params_pwi, // pwi: per work item | ||
// state | ||
float * __restrict__ state_pwi, | ||
// outputs | ||
float * __restrict__ tavg_pwi | ||
) | ||
{ | ||
// work id & size | ||
const unsigned int id = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; | ||
const unsigned int size = blockDim.x * gridDim.x * gridDim.y; | ||
|
||
#define params(i_par) (params_pwi[(size * (i_par)) + id]) | ||
#define state(time, i_node) (state_pwi[((time) * 6 * n_node + (i_node))*size + id]) | ||
#define tavg(i_node) (tavg_pwi[((i_node) * size) + id]) | ||
|
||
// unpack params | ||
// These are the two parameters which are usually explore in fitting in this model | ||
const float global_speed = params(0); | ||
const float global_coupling = params(1); | ||
|
||
// regular constants | ||
const float a = 1.0; | ||
const float b = 3.0; | ||
const float c = 1.0; | ||
const float d = 5.0; | ||
const float r = 0.00035; | ||
const float s = 4.0; | ||
const float x0 = -1.6; | ||
const float Iext = 3.1; | ||
const float slope = 0.; | ||
const float Iext2 = 3.1; | ||
const float tau = 10.0; | ||
const float aa = 6.0; | ||
const float bb = 2.0; | ||
const float Kvf = 0.0; | ||
const float Kf = 0.0; | ||
const float Ks = 0.0; | ||
const float tt = 1.0; | ||
const float modification = 1.0; | ||
|
||
// coupling constants, coupling itself is hardcoded in kernel | ||
const float c_a = 1; | ||
|
||
// coupling parameters | ||
float c_pop1 = 0.0; | ||
float c_pop2 = 0.0; | ||
|
||
// derived parameters | ||
const float rec_n = 1 / n_node; | ||
const float rec_speed_dt = 1.0f / global_speed / (dt); | ||
const float nsig = sqrt(dt) * sqrt(2.0 * 1e-5); | ||
|
||
|
||
// conditional_derived variable declaration | ||
float ydot0 = 0.0; | ||
float ydot2 = 0.0; | ||
float h = 0.0; | ||
float ydot4 = 0.0; | ||
|
||
curandState crndst; | ||
curand_init(id * (blockDim.x * gridDim.x * gridDim.y), 0, 0, &crndst); | ||
|
||
float x1 = 0.0; | ||
float y1 = 0.0; | ||
float z = 0.0; | ||
float x2 = 0.0; | ||
float y2 = 0.0; | ||
float g = 0.0; | ||
|
||
//***// This is only initialization of the observable | ||
for (unsigned int i_node = 0; i_node < n_node; i_node++) | ||
{ | ||
tavg(i_node) = 0.0f; | ||
if (i_step == 0){ | ||
state(i_step, i_node) = 0.001; | ||
} | ||
} | ||
|
||
//***// This is the loop over time, should stay always the same | ||
for (unsigned int t = i_step; t < (i_step + n_step); t++) | ||
{ | ||
//***// This is the loop over nodes, which also should stay the same | ||
for (unsigned int i_node = threadIdx.y; i_node < n_node; i_node+=blockDim.y) | ||
{ | ||
c_pop1 = 0.0f; | ||
c_pop2 = 0.0f; | ||
|
||
x1 = state((t) % nh, i_node + 0 * n_node); | ||
y1 = state((t) % nh, i_node + 1 * n_node); | ||
z = state((t) % nh, i_node + 2 * n_node); | ||
x2 = state((t) % nh, i_node + 3 * n_node); | ||
y2 = state((t) % nh, i_node + 4 * n_node); | ||
g = state((t) % nh, i_node + 5 * n_node); | ||
|
||
// This variable is used to traverse the weights and lengths matrix, which is really just a vector. It is just a displacement. / | ||
unsigned int i_n = i_node * n_node; | ||
|
||
for (unsigned int j_node = 0; j_node < n_node; j_node++) | ||
{ | ||
//***// Get the weight of the coupling between node i and node j | ||
float wij = weights[i_n + j_node]; // nb. not coalesced | ||
if (wij == 0.0) | ||
continue; | ||
|
||
//***// Get the delay between node i and node j | ||
unsigned int dij = lengths[i_n + j_node] * rec_speed_dt; | ||
|
||
//***// Get the state of node j which is delayed by dij | ||
float x1_j = state(((t - dij + nh) % nh), j_node + 0 * n_node); | ||
|
||
// Sum it all together using the coupling function. Kuramoto coupling: (postsyn * presyn) == ((a) * (sin(xj - xi))) | ||
c_pop1 += wij * c_a * sin(x1_j - x1); | ||
|
||
} // j_node */ | ||
|
||
// rec_n is used for the scaling over nodes | ||
c_pop1 *= global_coupling * coupling; | ||
c_pop2 *= g; | ||
|
||
if (x1 < 0.0) | ||
// The conditional variables | ||
ydot0 = -a * powf(x1, 2) + b * x1; | ||
else | ||
ydot0 = slope - x2 + 0.6 * powf((z - 4),2); | ||
if (z < 0.0) | ||
// The conditional variables | ||
ydot2 = - 0.1 * (powf(z, 7)); | ||
else | ||
ydot2 = 0; | ||
if (modification) | ||
// The conditional variables | ||
h = x0 + 3. / (1. + exp(-(x1 + 0.5) / 0.1)); | ||
else | ||
h = 4 * (x1 - x0) + ydot2; | ||
if (x2 < -0.25) | ||
// The conditional variables | ||
ydot4 = 0.0; | ||
else | ||
ydot4 = aa * (x2 + 0.25); | ||
// This is dynamics step and the update in the state of the node | ||
x1 += dt * (tt * (y1 - z + Iext + Kvf * c_pop1 + ydot0 )); | ||
y1 += dt * (tt * (c - d * powf(x1, 2) - y1)); | ||
z += dt * (tt * (r * (h - z + Ks * c_pop1))); | ||
x2 += dt * (tt * (-y2 + x2 - powf(x2, 3) + Iext2 + bb * g - 0.3 * (z - 3.5) + Kf * c_pop2)); | ||
y2 += dt * (tt * (-y2 + ydot4) / tau); | ||
g += dt * (tt * (-0.01 * (g - 0.1 * x1) )); | ||
|
||
// Add noise (if noise components are present in model), integrate with stochastic forward euler and wrap it up | ||
x1 += nsig * curand_normal2(&crndst).x; | ||
y1 += nsig * curand_normal2(&crndst).x; | ||
z += nsig * curand_normal2(&crndst).x; | ||
x2 += nsig * curand_normal2(&crndst).x; | ||
y2 += nsig * curand_normal2(&crndst).x; | ||
g += nsig * curand_normal2(&crndst).x; | ||
|
||
// Wrap it within the limits of the model | ||
x1 = wrap_it_x1(x1); | ||
y1 = wrap_it_y1(y1); | ||
z = wrap_it_z(z); | ||
x2 = wrap_it_x2(x2); | ||
y2 = wrap_it_y2(y2); | ||
g = wrap_it_g(g); | ||
|
||
// Update the state | ||
state((t + 1) % nh, i_node + 0 * n_node) = x1; | ||
state((t + 1) % nh, i_node + 1 * n_node) = y1; | ||
state((t + 1) % nh, i_node + 2 * n_node) = z; | ||
state((t + 1) % nh, i_node + 3 * n_node) = x2; | ||
state((t + 1) % nh, i_node + 4 * n_node) = y2; | ||
state((t + 1) % nh, i_node + 5 * n_node) = g; | ||
|
||
// Update the observable only for the last timestep | ||
if (t == (i_step + n_step - 1)){ | ||
tavg(i_node + 0 * n_node) = x1; | ||
} | ||
|
||
// sync across warps executing nodes for single sim, before going on to next time step | ||
__syncthreads(); | ||
|
||
} // for i_node | ||
} // for t | ||
|
||
// cleanup macros/*{{{*/ | ||
#undef params | ||
#undef state | ||
#undef tavg/*}}}*/ | ||
|
||
} // kernel integrate |
Oops, something went wrong.