From 7aab2256e8195e1ef525e6654c4cb7800ae4111e Mon Sep 17 00:00:00 2001
From: Francesc Verdugo <f.verdugo.rojano@vu.nl>
Date: Tue, 6 Feb 2024 18:18:37 +0100
Subject: [PATCH] Performance enhancements in example001

---
 .../GalerkinToolkitExamples/Project.toml      |  2 +
 .../GalerkinToolkitExamples/src/example001.jl | 60 ++++++++++++++-----
 .../test/example001_tests.jl                  |  9 +--
 3 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/extensions/GalerkinToolkitExamples/Project.toml b/extensions/GalerkinToolkitExamples/Project.toml
index c89b61df..94faabe8 100644
--- a/extensions/GalerkinToolkitExamples/Project.toml
+++ b/extensions/GalerkinToolkitExamples/Project.toml
@@ -6,8 +6,10 @@ version = "0.1.0"
 [deps]
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GalerkinToolkit = "5e3ba9c4-dd81-444d-b69a-0e7bd7bf60a4"
+IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
+Preconditioners = "af69fa37-3177-5a40-98ee-561f696e4fcd"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
diff --git a/extensions/GalerkinToolkitExamples/src/example001.jl b/extensions/GalerkinToolkitExamples/src/example001.jl
index f68fb715..2102326c 100644
--- a/extensions/GalerkinToolkitExamples/src/example001.jl
+++ b/extensions/GalerkinToolkitExamples/src/example001.jl
@@ -9,6 +9,9 @@ using WriteVTK
 using PartitionedArrays: JaggedArray
 using TimerOutputs
 
+using Preconditioners
+using IterativeSolvers: cg!
+
 # This one implements a vanilla sequential iso-parametric Poisson solver by only
 # using the mesh interface.
 
@@ -63,6 +66,7 @@ function default_params()
     params[:integration_degree] = 2
     params[:solver] = lu_solver()
     params[:example_path] = joinpath(outdir,"example001")
+    params[:export_vtu] = true
     params
 end
 
@@ -74,6 +78,26 @@ function lu_solver()
     (;setup,setup!,solve!,finalize!)
 end
 
+function cg_amg_solver(;reltol=1.0e-6,verbose=false)
+    function setup(x,A,b)
+        Pl = AMGPreconditioner{SmoothedAggregation}(A)
+        cache = (;Pl,A)
+        cache
+    end
+    function solve!(x,setup,b)
+        (;Pl,A) = setup
+        fill!(x,0)
+        cg!(x, A, b;Pl,reltol,verbose)
+    end
+    function setup!(setup,A)
+        error("not implemented")
+    end
+    function finalize!(setup)
+        nothing
+    end
+    (;setup,solve!,setup!,finalize!)
+end
+
 function setup_dirichlet_bcs(params)
     mesh = params[:mesh]
     u = params[:u]
@@ -198,7 +222,7 @@ end
 
 function assemble_system(state)
     timer = state.timer
-    @timeit timer "assemble_sytem" args = assemble_sytem_coo(state)
+    @timeit timer "assemble_sytem_coo" args = assemble_sytem_coo(state)
     I,J,V,b = args
     n_dofs = state.dofs.n_dofs
     @timeit timer "sparse" A = sparse(I,J,V,n_dofs,n_dofs)
@@ -234,9 +258,9 @@ function assemble_sybmolic(state)
         end
     end
 
-    I_coo = zeros(Int,n_coo)
-    J_coo = zeros(Int,n_coo)
-    V_coo = zeros(Float64,n_coo)
+    I_coo = Vector{Int32}(undef,n_coo)
+    J_coo = Vector{Int32}(undef,n_coo)
+    V_coo = Vector{Float64}(undef,n_coo)
     b = zeros(Float64,state.dofs.n_dofs)
 
     n_coo = 0
@@ -247,12 +271,13 @@ function assemble_sybmolic(state)
             if !(dofs[i]>0)
                 continue
             end
+            dofs_i = dofs[i]
             for j in 1:ndofs
                 if !(dofs[j]>0)
                     continue
                 end
                 n_coo += 1
-                I_coo[n_coo] = dofs[i]
+                I_coo[n_coo] = dofs_i
                 J_coo[n_coo] = dofs[j]
             end
         end
@@ -281,8 +306,10 @@ function assemble_numeric_cells!(I_coo,J_coo,V_coo,b,state)
     ∇x = map(i->similar(i,size(i,2)),∇s)
     Aes = map(i->zeros(size(i,2),size(i,2)),∇s)
     fes = map(i->zeros(size(i,2)),∇s)
+    ∇st = map(m->collect(permutedims(m)),∇s)
+    st = map(m->collect(permutedims(m)),s)
 
-    Tx = SVector{d,Float64}
+    Tx = eltype(node_to_x)
     TJ = typeof(zero(Tx)*zero(Tx)')
 
     i_coo = 0
@@ -293,9 +320,9 @@ function assemble_numeric_cells!(I_coo,J_coo,V_coo,b,state)
         Ae = Aes[rid]
         fe = fes[rid]
         nl = length(nodes)
-        ∇se = ∇s[rid]
+        ∇ste = ∇st[rid]
         ∇xe = ∇x[rid]
-        se = s[rid]
+        ste = st[rid]
         we = w[rid]
         nq = length(we)
         fill!(Ae,zero(eltype(Ae)))
@@ -306,8 +333,8 @@ function assemble_numeric_cells!(I_coo,J_coo,V_coo,b,state)
             xint = zero(Tx)
             for k in 1:nl
                 x = node_to_x[nodes[k]]
-                ∇sqx = ∇se[iq,k]
-                sqx = se[iq,k]
+                ∇sqx = ∇ste[k,iq]
+                sqx = ste[k,iq]
                 Jt += ∇sqx*x'
                 xint += sqx*x
             end
@@ -315,16 +342,17 @@ function assemble_numeric_cells!(I_coo,J_coo,V_coo,b,state)
             invJt = inv(Jt)
             dV = abs(detJt)*we[iq]
             for k in 1:nl
-                ∇xe[k] = invJt*∇se[iq,k]
+                ∇xe[k] = invJt*∇ste[k,iq]
             end
             for j in 1:nl
+                ∇xej = ∇xe[j]
                 for i in 1:nl
-                    Ae[i,j] += ∇xe[i]⋅∇xe[j]*dV
+                    Ae[i,j] += ∇xe[i]⋅∇xej*dV
                 end
             end
             fx = f(xint)
             for k in 1:nl
-                fe[k] +=  fx*se[iq,k]*dV
+                fe[k] +=  fx*ste[k,iq]*dV
             end
         end
         # Set the result in the output array
@@ -445,7 +473,7 @@ function integrate_error_norms_loop(uh,state)
     ncells = length(cell_to_rid)
     ues = map(i->zeros(size(i,2)),∇s)
     ∇x = map(i->similar(i,size(i,2)),∇s)
-    Tx = SVector{d,Float64}
+    Tx = eltype(node_to_x)
     TJ = typeof(zero(Tx)*zero(Tx)')
     for cell in 1:ncells
         rid = cell_to_rid[cell]
@@ -505,6 +533,9 @@ function integrate_error_norms(results,uh,state)
 end
 
 function export_results(uh,params,state)
+    if ! params[:export_vtu]
+        return nothing
+    end
     mesh = params[:mesh]
     example_path = params[:example_path]
     node_to_tag = state.dirichlet_bcs.node_to_tag
@@ -514,6 +545,7 @@ function export_results(uh,params,state)
         vtk["uh"] = uh
         vtk["dim"] = gk.face_dim(mesh)
     end
+    nothing
 end
 
 ### From here, it is old stuff to to be deleted
diff --git a/extensions/GalerkinToolkitExamples/test/example001_tests.jl b/extensions/GalerkinToolkitExamples/test/example001_tests.jl
index 796d5742..40ef85f0 100644
--- a/extensions/GalerkinToolkitExamples/test/example001_tests.jl
+++ b/extensions/GalerkinToolkitExamples/test/example001_tests.jl
@@ -64,9 +64,10 @@ results = Example001.main(params)
 @test results[:el2] < tol
 
 params = Dict{Symbol,Any}()
-params[:mesh] = gk.cartesian_mesh((0,3,0,2,0,1),(30,30,30))
-results = Example001.main(params)
-@test results[:eh1] < tol
-@test results[:el2] < tol
+params[:mesh] = gk.cartesian_mesh((0,3,0,2,0,1),(80,80,80))
+params[:solver] = Example001.cg_amg_solver(;verbose=true)
+params[:export_vtu] = false
+Example001.main(params)
+Example001.main(params)
 
 end # module