Skip to content

Commit

Permalink
CUDA script
Browse files Browse the repository at this point in the history
  • Loading branch information
joott committed Jun 16, 2022
1 parent 8566df7 commit 88f360c
Showing 1 changed file with 108 additions and 0 deletions.
108 changes: 108 additions & 0 deletions modelB_CUDA.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
cd(@__DIR__)

using Distributions
using Printf
using FFTW
using JLD2
using Random
using LinearAlgebra
using CUDA
using BenchmarkTools

Random.seed!(parse(Int, ARGS[3]))

const L = parse(Int, ARGS[2]) # must be a multiple of 4
const λ = 4.0e0
const Γ = 1.0e0
const T = 1.0e0

const Δt = 0.04e0/Γ
const Rate = Float64(sqrt(2.0*Δt*Γ))
ξ = Normal(0.0e0, 1.0e0)

function hotstart(n)
rand(ξ, n, n, n)
end

function ΔH(x, ϕ, q, m², L)
@inbounds ϕold = ϕ[x...]
ϕt = ϕold + q
Δϕ = ϕt - ϕold
Δϕ² = ϕt^2 - ϕold^2

@inbounds ∑nn = ϕ[x[1]%L+1, x[2], x[3]] + ϕ[x[1], x[2]%L+1, x[3]] + ϕ[x[1], x[2], x[3]%L+1] + ϕ[(x[1]+L-2)%L+1, x[2], x[3]] + ϕ[x[1], (x[2]+L-2)%L+1, x[3]] + ϕ[x[1], x[2], (x[3]+L-2)%L+1]

return 3Δϕ² - Δϕ * ∑nn + 0.5* Δϕ² + 0.25λ * (ϕt^4 - ϕold^4)
end

function step(m², ϕ, x1, x2, L, R, r_i)
q = Rate*R[r_i]

δH = ΔH(x1, ϕ, q, m², L) + ΔH(x2, ϕ, -q, m², L) + q^2
P = min(1.0f0, exp(-δH))
r = rand()

@inbounds ϕ[x1...] += q * (r<P)
@inbounds ϕ[x2...] -= q * (r<P)
end

function sweep(m², ϕ, R, kernel, threads, blocks)
#=
n=0 : (i,j,k)->(x,y,z)
n=1 : (i,j,k)->(y,z,x)
n=2 : (i,j,k)->(z,x,y)
pairs are in i direction
=#
for n in 0:2, m in 1:4
kernel(m², ϕ, L, n, m, R; threads, blocks)
end
end

function gpu_sweep(m², ϕ, L, n, m, R)
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x - 1
stride = gridDim().x * blockDim().x

for l in index:stride:L^3÷4-1
i = l ÷ L^2
j = (l÷L) % L
k = l%L

tmp = (4i + 2j + m%2, j + k + m÷2, k)
(x, y, z) = (tmp[n+1], tmp[(n+1)%3+1], tmp[(n+2)%3+1])
x1 = (x%L+1, y%L+1, z%L+1)
x2 = ((x + (n==0))%L+1, (y + (n==1))%L+1, (z + (n==2))%L+1)

r_i = L^3÷4 * (n + 3(m-1)) + l + 1
step(m², ϕ, x1, x2, L, R, r_i)
end
return
end

function thermalize(m², ϕ, kernel, threads, blocks, N=10000)
R = CUDA.randn(N*3L^3)
for i in 1:N
sweep(m², ϕ, R[range((i-1)*3L^3+1, length=3L^3)], kernel, threads, blocks)
end
end

= -2.28587

ϕ = hotstart(L)
ϕ .= ϕ .- shuffle(ϕ)
ϕ = CuArray(ϕ)

N = 1024

R = CUDA.randn(3L^3)

kernel = @cuda launch=false gpu_sweep(m², ϕ, L, 0, 1, R)
config = launch_configuration(kernel.fun)
threads = min(N, config.threads)
blocks = cld(N, threads)

maxt = L^2

for i in 0:maxt
thermalize(m², ϕ, kernel, threads, blocks, 4*L^2)
jldsave("/share/tmschaef/jkott/modelB/KZ/IC_crit_L_$L"*"_id_"*ARGS[1]*".jld2", true; ϕ=ϕ, m2=m², i=i)
end

0 comments on commit 88f360c

Please sign in to comment.