Skip to content

Commit

Permalink
Some GPU improvements.
Browse files Browse the repository at this point in the history
  • Loading branch information
albop committed Nov 16, 2024
1 parent a0b5413 commit ade6b3b
Show file tree
Hide file tree
Showing 14 changed files with 141 additions and 96 deletions.
9 changes: 5 additions & 4 deletions ext/DoloCUDAExt.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
module DoloCUDAExt

using CUDA
using Dolo
# using Dolo
# should it be merged with the general definition?
import Dolo: GVector, distance
# import CUDA: CuArray

import CUDA: CuArray

Dolo.distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:CuArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data-y.data)
distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:CuArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data-y.data)
norm(x::GVector{G, A}) where G where A<:CuArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data)


end
1 change: 1 addition & 0 deletions ext/DolooneAPIExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ module DolooneAPIExt
using Dolo: distance

Dolo.distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:oneArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data-y.data)
norm(x::GVector{G, A}) where G where A<:CuArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data)

import Dolo.splines: prefilter!

Expand Down
2 changes: 1 addition & 1 deletion src/algos/simul.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ end


# TODO We should differentiate here whether dproc has a markov chain or something else
function τ(dmodel::Dolo.DYModel{M}, ss::T, a::SVector) where M<:Union{Dolo.YModel{<:Dolo.VAR1},Dolo.YModel{<:Dolo.MarkovChain}} where T<:QP
function τ(dmodel::Dolo.DYModel{M}, ss::T, a::SVector) where M<:Union{Dolo.AModel{<:Dolo.VAR1},Dolo.AModel{<:Dolo.MarkovChain}} where T<:QP

(i,_) = ss.loc
s_ = ss.val
Expand Down
27 changes: 20 additions & 7 deletions src/algos/time_iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ function F(dmodel::ADModel, s::QP, x::SVector{d,T}, φ::Union{Policy, GArray, DF
r = zero(SVector{d,T})

for (w,S) in τ(dmodel, s, x)
# return w,S
r += w*arbitrage(dmodel,s,x,S,φ(S))
end

Expand All @@ -33,6 +34,7 @@ function F(dmodel::ADModel, s::QP, x::SVector{d,T}, φ::Union{Policy, GArray, DF
# w*arbitrage(model,s,x,S,φ(S))
# for (w,S) in τ(model, s, x)
# )

r::SVector{d,T}
r = complementarities(dmodel.model, s,x,r)
r
Expand Down Expand Up @@ -191,12 +193,15 @@ function time_iteration(model::DYModel,
engine=:none
)


log = IterationLog(
it = ("n", Int),
err = ("ϵₙ=|F(xₙ,xₙ)|", Float64),
sa = ("ηₙ=|xₙ-xₙ₋₁|", Float64),
lam = ("λₙ=ηₙ/ηₙ₋₁",Float64),
elapsed = ("Time", Float64)
elapsed = ("Time", Float64),
k_newton = ("Newton", Int64),
k = ("Backsteps", Int64)
)
initialize(log, verbose=verbose; message="Time Iteration")

Expand Down Expand Up @@ -226,6 +231,8 @@ function time_iteration(model::DYModel,
if improve
J_2 = workspace.L
end
local k_newton_ = 0
local k_ = 0

for t=1:T

Expand Down Expand Up @@ -253,7 +260,10 @@ function time_iteration(model::DYModel,

x1.data .= x0.data

for k=1:max_bsteps

for k_newton=1:max_bsteps

#newton steaps

F!(r0, model, x1, φ, t_engine)

Expand All @@ -273,15 +283,18 @@ function time_iteration(model::DYModel,


for k=0:mbsteps
x2.data .= x1.data .- dx.data .* lam^k

# fails on GPU
x2.data .= x1.data .- dx.data .* convert(Tf,lam^k)
F!(r0, model, x2, φ, t_engine)

ε_b = norm(r0)
if ε_b<ε_n
iterations = t
k_newton_ = k_newton
k_ = k
break
end

end

x1.data .= x2.data
Expand All @@ -297,7 +310,7 @@ function time_iteration(model::DYModel,
elapsed = time_ns() - t1
elapsed /= 1000000000

verbose ? append!(log; verbose=verbose, it=t-1, err=ε, sa=η_0, lam=gain, elapsed=elapsed) : nothing
verbose ? append!(log; verbose=verbose, it=t-1, err=ε, sa=η_0, lam=gain, elapsed=elapsed, k_newton=k_newton_, k=k_) : nothing

if η < tol_η
iterations = t
Expand All @@ -316,9 +329,9 @@ function time_iteration(model::DYModel,
J_1 = J

Dolo.dF_1!(J_1, model, x1, φ, t_engine)

Dolo.dF_2!(J_2, model, x1, φ, t_engine)
# J_2 = Dolo.dF_2(model, x1, φ)


mul!(J_2, convert(Tf,-1.0))

Expand All @@ -328,7 +341,7 @@ function time_iteration(model::DYModel,
d = (x1-x0)

# return (J_1, J_2, d)
rhs = neumann(Tp, d; K=improve_K)
rhs = neumann(Tp, d; K=improve_K, t_engine=t_engine)

x0.data .+= rhs.data

Expand Down
59 changes: 16 additions & 43 deletions src/algos/time_iteration_accelerated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,44 +7,30 @@ import KernelAbstractions: get_backend

get_backend(g::GArray) = get_backend(g.data)



# This is for a PGrid model only
function F!(r, model, x, φ, engine)

@kernel function FF_(r, @Const(dm), @Const(x), @Const(φ))

n = @index(Global, Linear)

ind = @index(Global, Cartesian)
(i,j) = ind.I

# k = length(dm.grid.grids[1])
# i_ = (n-1)÷k
# j_ = (n-1) - (i)*κ
# i = i_+1
# j = j_+1

# TODO: special function here
s_ = dm.grid[n]
s = Dolo.QP((i,j), s_)
s = dm.grid[n+im]

xx = x[n]

# (i,j) = @inline Dolo.from_linear(model.grid, n)


rr = Dolo.F(dm, s, xx, φ)

r[n] = rr

end

fun_cpu = FF_(engine)



sz = size(model.grid)

fun_cpu = FF_(engine, 1000)

res = fun_cpu(r, model, x, φ; ndrange=sz)
fun_cpu(r, model, x, φ; ndrange=sz)
synchronize(engine)


end
Expand All @@ -57,23 +43,13 @@ function dF_1!(out, model, controls::GArray, φ::Union{GArray, DFun}, engine)


n = @index(Global, Linear)
ind = @index(Global, Cartesian)
(i,j) = ind.I

# k = length(dm.grid.grids[1])
# i_ = (n-1)÷k
# j_ = (n-1) - (i)*κ
# i = i_+1
# j = j_+1

# TODO: special function here
s_ = dm.grid[n]
s = Dolo.QP((i,j), s_)
s = dm.grid[n+im]

xx = x[n]

# (i,j) = @inline Dolo.from_linear(model.grid, n)


rr = ForwardDiff.jacobian(u->F(model, s, u, φ), xx)

r[n] = rr
Expand All @@ -91,23 +67,20 @@ end #### no alloc

function dF_2!(out, dmodel, controls::GArray, φ::DFun, engine)

@kernel function FF_(L,@Const(dm), @Const(x),@Const(φ) )
@kernel function FF_(L, @Const(dm), @Const(x),@Const(φ) )


n = @index(Global, Linear)
ind = @index(Global, Cartesian)
(i,j) = ind.I

# TODO: special function here
s_ = dm.grid[n]
s = Dolo.QP((i,j), s_)
s = dm.grid[n+im]

xx = x[n]


# r0 = sum( w*arbitrage(dmodel,s,xx,S,φ(S)) for (w,S) in τ(dmodel, s, xx) )
r0 = F(dmodel, s, xx, φ)
r_F = ForwardDiff.jacobian(
r->complementarities(dmodel.model, s,xx,r),
sum( w*arbitrage(dmodel,s,xx,S,φ(S)) for (w,S) in τ(dmodel, s, xx) ),
r0
)

tt = tuple(
Expand All @@ -124,10 +97,10 @@ function dF_2!(out, dmodel, controls::GArray, φ::DFun, engine)

end

fun_cpu = FF_(engine)
fun = FF_(engine)
sz = size(dmodel.grid)

res = fun_cpu(out, dmodel, controls, φ; ndrange=sz)
fun(out, dmodel, controls, φ; ndrange=sz)

nothing

Expand Down
36 changes: 30 additions & 6 deletions src/conversion.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,36 @@
using StaticArrays: SMatrix

function convert_precision(T, exogenous::Dolo.MarkovChain)

fun = u->T(u)
P = fun.(exogenous.P)
Q = SVector((fun.(e) for e in exogenous.Q)...)
vars = Dolo.variables(exogenous)
Dolo.MarkovChain(vars, P, Q)

end

function convert_precision(T, var::Dolo.VAR1)

d = size(var.Σ,1)

vars = Dolo.variables(var)

ρ = convert(T, var.ρ)
Σ = SMatrix{d,d,T,d*d}(var.Σ)

Dolo.VAR1(vars,ρ,Σ)

end

function convert_precision(T, model::Dolo.YModel)

# convert calibration
calibration = NamedTuple( ((a,T(b)) for (a,b) in pairs(model.calibration) ) )

# convert exogenous shock
fun = u->T(u)
P = fun.(model.exogenous.P)
Q = SVector((fun.(e) for e in model.exogenous.Q)...)

vars = Dolo.variables(model.exogenous)
exogenous = Dolo.MarkovChain(vars, P, Q)
exogenous = convert_precision(T, model.exogenous)

# convert states and controls spaces
states = convert_precision(T, model.states)
Expand Down Expand Up @@ -46,4 +66,8 @@ function hypeof(model)
s = replace(string(typ),string(otyp)=>"Float32")
gtyp = ( Meta.parse(s) )
return Union{eval(gtyp), typ}
end
end


getprecision(model::Dolo.YModel) = typeof(model.calibration[1])
getprecision(dmodel::Dolo.DYModel) = getprecision(dmodel.model)
4 changes: 2 additions & 2 deletions src/dev_L2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,8 @@ function neumann(L2::LF, r0; K=1000, τ_η=1e-10, t_engine=nothing)

mem = (;du, dv)

neumann!(dx, L2, r0, mem;
K=K, τ_η=τ_η)
neumann!(dx, L2, r0, t_engine;
mem = mem, K=K, τ_η=τ_η)

return dx

Expand Down
15 changes: 8 additions & 7 deletions src/dolo_model.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
struct YModel{C,A,B,D,N,S} <: AModel
struct YModel{N,C,A,B,D,S} <: AModel{C}
states::A # must be exo \times endo
controls::B
exogenous::C
Expand All @@ -7,11 +7,11 @@ struct YModel{C,A,B,D,N,S} <: AModel
end

YModel(N,A,B,C,D) = let
YModel{typeof(C),typeof(A),typeof(B),typeof(D),N,Nothing}(A,B,C,D,nothing)
YModel{N,typeof(C),typeof(A),typeof(B),typeof(D),Nothing}(A,B,C,D,nothing)
end
YModel(N,A,B,C,D,S) = YModel{typeof(C),typeof(A),typeof(B),typeof(D),N,typeof(S)}(A,B,C,D,S)
YModel(N,A,B,C,D,S) = YModel{N,typeof(C),typeof(A),typeof(B),typeof(D),typeof(S)}(A,B,C,D,S)

name(::YModel{C,A,B,D,N}) where C where A where B where D where N = N
name(::YModel{N,C,A,B,D}) where C where A where B where D where N = N

bounds(model::YModel, s) = model.controls

Expand Down Expand Up @@ -63,7 +63,7 @@ name(dm::DYModel) = name(dm.model)

bounds(dmodel::DYModel, s) = bounds(dmodel.model, s)

function discretize(model::YModel{<:MvNormal}, d=Dict())
function discretize(model::AModel{<:MvNormal}, d=Dict())
exo = get(d, :exo, Dict())
endo = get(d, :endo, Dict())

Expand All @@ -72,7 +72,8 @@ function discretize(model::YModel{<:MvNormal}, d=Dict())
return DYModel(model, grid, dist)
end

function discretize(model::YModel{<:VAR1}, d=Dict())
function discretize(model::AModel{<:VAR1}, d=Dict())

exo = get(d, :exo, Dict())
endo = get(d, :endo, Dict())
dvar = discretize(model.exogenous, exo)
Expand All @@ -92,7 +93,7 @@ function discretize(model::YModel{<:VAR1}, d=Dict())
return DYModel(model, grid, dvar)
end

function discretize(model::YModel{<:MarkovChain}, d=Dict())
function discretize(model::AModel{<:MarkovChain}, d=Dict())
# exo = get(d, :exo, Dict())
endo = get(d, :endo, Dict())
dvar = discretize(model.exogenous)
Expand Down
2 changes: 1 addition & 1 deletion src/model.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# abstract model
abstract type AbstractModel end
abstract type AbstractModel{Exo} end
abstract type AbstractDModel end

const AModel = AbstractModel
Expand Down
2 changes: 1 addition & 1 deletion src/model_extensions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ function transition(model::YModel, s::SVector, x::SVector, M::SVector)
ss = NamedTuple{variables(model.states)}(s)
xx = NamedTuple{variables(model.controls)}(x)
MM = NamedTuple{variables(model.exogenous)}(M)

SS = transition(model,ss,xx,MM)

return SVector(SS...)
Expand Down
Loading

0 comments on commit ade6b3b

Please sign in to comment.