Some GPU improvements.

EconForge · Nov 16, 2024 · ade6b3b · ade6b3b
1 parent a0b5413
commit ade6b3b
Show file tree

Hide file tree

Showing 14 changed files with 141 additions and 96 deletions.
diff --git a/ext/DoloCUDAExt.jl b/ext/DoloCUDAExt.jl
@@ -1,12 +1,13 @@
 module DoloCUDAExt
 
     using CUDA
-    using Dolo
+    # using Dolo
 # should it be merged with the general definition?
+    import Dolo: GVector, distance
+    # import CUDA: CuArray
 
-    import CUDA: CuArray
-
-    Dolo.distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:CuArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data-y.data)
+    distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:CuArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data-y.data)
+    norm(x::GVector{G, A}) where G where A<:CuArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data)
 
 
 end
diff --git a/ext/DolooneAPIExt.jl b/ext/DolooneAPIExt.jl
@@ -7,6 +7,7 @@ module DolooneAPIExt
     using Dolo: distance
 
     Dolo.distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:oneArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data-y.data)
+    norm(x::GVector{G, A}) where G where A<:CuArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data)
 
     import Dolo.splines: prefilter!
 

diff --git a/src/algos/simul.jl b/src/algos/simul.jl
@@ -7,7 +7,7 @@ end
 
 
 # TODO We should differentiate here whether dproc has a markov chain or something else
-function τ(dmodel::Dolo.DYModel{M}, ss::T, a::SVector) where M<:Union{Dolo.YModel{<:Dolo.VAR1},Dolo.YModel{<:Dolo.MarkovChain}}  where T<:QP
+function τ(dmodel::Dolo.DYModel{M}, ss::T, a::SVector) where M<:Union{Dolo.AModel{<:Dolo.VAR1},Dolo.AModel{<:Dolo.MarkovChain}}  where T<:QP
 
     (i,_) = ss.loc
     s_ = ss.val

diff --git a/src/algos/time_iteration.jl b/src/algos/time_iteration.jl
@@ -24,6 +24,7 @@ function F(dmodel::ADModel, s::QP, x::SVector{d,T}, φ::Union{Policy, GArray, DF
     r = zero(SVector{d,T})
 
     for (w,S) in τ(dmodel, s, x)
+        # return w,S
         r += w*arbitrage(dmodel,s,x,S,φ(S)) 
     end
 
@@ -33,6 +34,7 @@ function F(dmodel::ADModel, s::QP, x::SVector{d,T}, φ::Union{Policy, GArray, DF
     #      w*arbitrage(model,s,x,S,φ(S)) 
     #      for (w,S) in τ(model, s, x)
     # )
+
     r::SVector{d,T}
     r = complementarities(dmodel.model, s,x,r)
     r
@@ -191,12 +193,15 @@ function time_iteration(model::DYModel,
     engine=:none
 )
 
+
     log = IterationLog(
             it = ("n", Int),
             err =  ("ϵₙ=|F(xₙ,xₙ)|", Float64),
             sa =  ("ηₙ=|xₙ-xₙ₋₁|", Float64),
             lam = ("λₙ=ηₙ/ηₙ₋₁",Float64),
-            elapsed = ("Time", Float64)
+            elapsed = ("Time", Float64),
+            k_newton = ("Newton", Int64),
+            k = ("Backsteps", Int64)
         )    
     initialize(log, verbose=verbose; message="Time Iteration")
 
@@ -226,6 +231,8 @@ function time_iteration(model::DYModel,
     if improve
         J_2 = workspace.L
     end
+    local k_newton_ = 0
+    local k_ = 0
 
     for t=1:T
 
@@ -253,7 +260,10 @@ function time_iteration(model::DYModel,
 
         x1.data .= x0.data
 
-        for k=1:max_bsteps
+
+        for k_newton=1:max_bsteps
+
+            #newton steaps
 
             F!(r0, model, x1,  φ, t_engine)
 
@@ -273,15 +283,18 @@ function time_iteration(model::DYModel,
 
 
             for k=0:mbsteps
-                x2.data .= x1.data .- dx.data .* lam^k
 
+                # fails on GPU
+                x2.data .= x1.data .- dx.data .* convert(Tf,lam^k)
                 F!(r0, model, x2,  φ, t_engine)
-
                 ε_b = norm(r0)
                 if ε_b<ε_n
                     iterations = t
+                    k_newton_ = k_newton
+                    k_ = k
                     break
                 end
+
             end
 
             x1.data .= x2.data
@@ -297,7 +310,7 @@ function time_iteration(model::DYModel,
         elapsed = time_ns() - t1
         elapsed /= 1000000000
 
-        verbose ? append!(log; verbose=verbose, it=t-1, err=ε, sa=η_0, lam=gain, elapsed=elapsed) : nothing
+        verbose ? append!(log; verbose=verbose, it=t-1, err=ε, sa=η_0, lam=gain, elapsed=elapsed, k_newton=k_newton_, k=k_) : nothing
 
         if η < tol_η
             iterations = t
@@ -316,9 +329,9 @@ function time_iteration(model::DYModel,
             J_1 = J
 
             Dolo.dF_1!(J_1, model, x1, φ, t_engine)
+
             Dolo.dF_2!(J_2, model, x1, φ, t_engine)
             # J_2 = Dolo.dF_2(model, x1, φ)
-
 
             mul!(J_2, convert(Tf,-1.0))
 
@@ -328,7 +341,7 @@ function time_iteration(model::DYModel,
             d = (x1-x0)
 
             # return (J_1, J_2, d)
-            rhs =  neumann(Tp, d; K=improve_K)
+            rhs =  neumann(Tp, d; K=improve_K, t_engine=t_engine)
 
             x0.data .+= rhs.data
 

diff --git a/src/algos/time_iteration_accelerated.jl b/src/algos/time_iteration_accelerated.jl
@@ -7,44 +7,30 @@ import KernelAbstractions: get_backend
 
 get_backend(g::GArray) = get_backend(g.data)
 
-
-
-# This is for a PGrid model only 
 function F!(r, model, x, φ, engine)
 
     @kernel function FF_(r, @Const(dm), @Const(x), @Const(φ))
 
         n = @index(Global, Linear)
-
-        ind = @index(Global, Cartesian)
-        (i,j) = ind.I
 
-        # k = length(dm.grid.grids[1])
-        # i_ = (n-1)÷k
-        # j_ = (n-1) - (i)*κ
-        # i = i_+1
-        # j = j_+1
-
         # TODO: special function here
-        s_ = dm.grid[n]
-        s = Dolo.QP((i,j), s_)
+        s = dm.grid[n+im]
 
         xx = x[n]
-
-        # (i,j) = @inline Dolo.from_linear(model.grid, n)    
-
+
         rr = Dolo.F(dm, s, xx, φ)
 
         r[n] = rr
 
     end
 
-    fun_cpu = FF_(engine)
-
-
+
     sz = size(model.grid)
+
+    fun_cpu = FF_(engine, 1000)
 
-    res = fun_cpu(r, model, x, φ; ndrange=sz)
+    fun_cpu(r, model, x, φ; ndrange=sz)
+    synchronize(engine)
 
 
 end
@@ -57,23 +43,13 @@ function dF_1!(out, model, controls::GArray, φ::Union{GArray, DFun}, engine)
 
 
         n = @index(Global, Linear)
-        ind = @index(Global, Cartesian)
-        (i,j) = ind.I
 
-        # k = length(dm.grid.grids[1])
-        # i_ = (n-1)÷k
-        # j_ = (n-1) - (i)*κ
-        # i = i_+1
-        # j = j_+1
-
         # TODO: special function here
-        s_ = dm.grid[n]
-        s = Dolo.QP((i,j), s_)
+        s = dm.grid[n+im]
 
         xx = x[n]
 
-        # (i,j) = @inline Dolo.from_linear(model.grid, n)    
-
+
         rr = ForwardDiff.jacobian(u->F(model, s, u, φ), xx)
 
         r[n] = rr
@@ -91,23 +67,20 @@ end    #### no alloc
 
 function dF_2!(out, dmodel, controls::GArray, φ::DFun, engine)
 
-    @kernel function FF_(L,@Const(dm), @Const(x),@Const(φ) )
+    @kernel function FF_(L, @Const(dm), @Const(x),@Const(φ) )
 
 
         n = @index(Global, Linear)
-        ind = @index(Global, Cartesian)
-        (i,j) = ind.I
 
-        # TODO: special function here
-        s_ = dm.grid[n]
-        s = Dolo.QP((i,j), s_)
+        s = dm.grid[n+im]
 
         xx = x[n]
-
 
+        # r0 = sum( w*arbitrage(dmodel,s,xx,S,φ(S)) for (w,S) in τ(dmodel, s, xx) )
+        r0 = F(dmodel, s, xx, φ)
         r_F = ForwardDiff.jacobian(
             r->complementarities(dmodel.model, s,xx,r),
-            sum( w*arbitrage(dmodel,s,xx,S,φ(S)) for (w,S) in τ(dmodel, s, xx) ),
+            r0
         )
 
         tt = tuple(
@@ -124,10 +97,10 @@ function dF_2!(out, dmodel, controls::GArray, φ::DFun, engine)
 
     end
 
-    fun_cpu = FF_(engine)
+    fun = FF_(engine)
     sz = size(dmodel.grid)
 
-    res = fun_cpu(out, dmodel, controls, φ; ndrange=sz)
+    fun(out, dmodel, controls, φ; ndrange=sz)
 
     nothing
 

diff --git a/src/conversion.jl b/src/conversion.jl
@@ -1,16 +1,36 @@
+using StaticArrays: SMatrix
+
+function convert_precision(T, exogenous::Dolo.MarkovChain)
+
+    fun = u->T(u)
+    P = fun.(exogenous.P)
+    Q = SVector((fun.(e) for e in exogenous.Q)...)
+    vars = Dolo.variables(exogenous)
+    Dolo.MarkovChain(vars, P, Q)
+
+end
+
+function convert_precision(T, var::Dolo.VAR1)
+
+    d = size(var.Σ,1)
+
+    vars = Dolo.variables(var)
+
+    ρ = convert(T, var.ρ)
+    Σ = SMatrix{d,d,T,d*d}(var.Σ)
+
+    Dolo.VAR1(vars,ρ,Σ)
+
+end
 
 function convert_precision(T, model::Dolo.YModel)
 
     # convert calibration
     calibration = NamedTuple( ((a,T(b)) for (a,b) in pairs(model.calibration) ) )
 
     # convert exogenous shock
-    fun = u->T(u)
-    P = fun.(model.exogenous.P)
-    Q = SVector((fun.(e) for e in model.exogenous.Q)...)
 
-    vars = Dolo.variables(model.exogenous)
-    exogenous = Dolo.MarkovChain(vars, P, Q)
+    exogenous = convert_precision(T, model.exogenous)
 
     # convert states and controls spaces
     states = convert_precision(T, model.states)
@@ -46,4 +66,8 @@ function hypeof(model)
     s = replace(string(typ),string(otyp)=>"Float32")
     gtyp = ( Meta.parse(s) )
     return Union{eval(gtyp), typ}
-end
+end
+
+
+getprecision(model::Dolo.YModel) = typeof(model.calibration[1])
+getprecision(dmodel::Dolo.DYModel) = getprecision(dmodel.model)
diff --git a/src/dev_L2.jl b/src/dev_L2.jl
@@ -217,8 +217,8 @@ function neumann(L2::LF, r0; K=1000, τ_η=1e-10, t_engine=nothing)
 
     mem = (;du, dv)
 
-    neumann!(dx, L2, r0, mem;
-        K=K, τ_η=τ_η)
+    neumann!(dx, L2, r0, t_engine;
+        mem = mem, K=K, τ_η=τ_η)
 
     return dx
 

diff --git a/src/dolo_model.jl b/src/dolo_model.jl
@@ -1,4 +1,4 @@
-struct YModel{C,A,B,D,N,S} <: AModel
+struct YModel{N,C,A,B,D,S} <: AModel{C}
     states::A         # must be exo \times endo
     controls::B
     exogenous::C
@@ -7,11 +7,11 @@ struct YModel{C,A,B,D,N,S} <: AModel
 end 
 
 YModel(N,A,B,C,D) = let
-    YModel{typeof(C),typeof(A),typeof(B),typeof(D),N,Nothing}(A,B,C,D,nothing)
+    YModel{N,typeof(C),typeof(A),typeof(B),typeof(D),Nothing}(A,B,C,D,nothing)
 end
-YModel(N,A,B,C,D,S) = YModel{typeof(C),typeof(A),typeof(B),typeof(D),N,typeof(S)}(A,B,C,D,S)
+YModel(N,A,B,C,D,S) = YModel{N,typeof(C),typeof(A),typeof(B),typeof(D),typeof(S)}(A,B,C,D,S)
 
-name(::YModel{C,A,B,D,N}) where C where A where B where D where N = N
+name(::YModel{N,C,A,B,D}) where C where A where B where D where N = N
 
 bounds(model::YModel, s) = model.controls
 
@@ -63,7 +63,7 @@ name(dm::DYModel) = name(dm.model)
 
 bounds(dmodel::DYModel, s) = bounds(dmodel.model, s)
 
-function discretize(model::YModel{<:MvNormal}, d=Dict())
+function discretize(model::AModel{<:MvNormal}, d=Dict())
     exo = get(d, :exo, Dict())
     endo = get(d, :endo, Dict())
 
@@ -72,7 +72,8 @@ function discretize(model::YModel{<:MvNormal}, d=Dict())
     return DYModel(model, grid, dist)
 end
 
-function discretize(model::YModel{<:VAR1}, d=Dict())
+function discretize(model::AModel{<:VAR1}, d=Dict())
+
     exo = get(d, :exo, Dict())
     endo = get(d, :endo, Dict())
     dvar = discretize(model.exogenous, exo)
@@ -92,7 +93,7 @@ function discretize(model::YModel{<:VAR1}, d=Dict())
     return DYModel(model, grid, dvar)
 end
 
-function discretize(model::YModel{<:MarkovChain}, d=Dict())
+function discretize(model::AModel{<:MarkovChain}, d=Dict())
     # exo = get(d, :exo, Dict())
     endo = get(d, :endo, Dict())
     dvar = discretize(model.exogenous)

diff --git a/src/model.jl b/src/model.jl
@@ -1,5 +1,5 @@
 # abstract model
-abstract type AbstractModel end
+abstract type AbstractModel{Exo} end
 abstract type AbstractDModel end
 
 const AModel = AbstractModel

diff --git a/src/model_extensions.jl b/src/model_extensions.jl
@@ -23,7 +23,7 @@ function transition(model::YModel, s::SVector, x::SVector, M::SVector)
     ss = NamedTuple{variables(model.states)}(s)
     xx = NamedTuple{variables(model.controls)}(x)
     MM = NamedTuple{variables(model.exogenous)}(M)
-
+    
     SS = transition(model,ss,xx,MM)
 
     return SVector(SS...)