From 7aab2256e8195e1ef525e6654c4cb7800ae4111e Mon Sep 17 00:00:00 2001 From: Francesc Verdugo Date: Tue, 6 Feb 2024 18:18:37 +0100 Subject: [PATCH] Performance enhancements in example001 --- .../GalerkinToolkitExamples/Project.toml | 2 + .../GalerkinToolkitExamples/src/example001.jl | 60 ++++++++++++++----- .../test/example001_tests.jl | 9 +-- 3 files changed, 53 insertions(+), 18 deletions(-) diff --git a/extensions/GalerkinToolkitExamples/Project.toml b/extensions/GalerkinToolkitExamples/Project.toml index c89b61df..94faabe8 100644 --- a/extensions/GalerkinToolkitExamples/Project.toml +++ b/extensions/GalerkinToolkitExamples/Project.toml @@ -6,8 +6,10 @@ version = "0.1.0" [deps] ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" GalerkinToolkit = "5e3ba9c4-dd81-444d-b69a-0e7bd7bf60a4" +IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9" +Preconditioners = "af69fa37-3177-5a40-98ee-561f696e4fcd" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" diff --git a/extensions/GalerkinToolkitExamples/src/example001.jl b/extensions/GalerkinToolkitExamples/src/example001.jl index f68fb715..2102326c 100644 --- a/extensions/GalerkinToolkitExamples/src/example001.jl +++ b/extensions/GalerkinToolkitExamples/src/example001.jl @@ -9,6 +9,9 @@ using WriteVTK using PartitionedArrays: JaggedArray using TimerOutputs +using Preconditioners +using IterativeSolvers: cg! + # This one implements a vanilla sequential iso-parametric Poisson solver by only # using the mesh interface. @@ -63,6 +66,7 @@ function default_params() params[:integration_degree] = 2 params[:solver] = lu_solver() params[:example_path] = joinpath(outdir,"example001") + params[:export_vtu] = true params end @@ -74,6 +78,26 @@ function lu_solver() (;setup,setup!,solve!,finalize!) end +function cg_amg_solver(;reltol=1.0e-6,verbose=false) + function setup(x,A,b) + Pl = AMGPreconditioner{SmoothedAggregation}(A) + cache = (;Pl,A) + cache + end + function solve!(x,setup,b) + (;Pl,A) = setup + fill!(x,0) + cg!(x, A, b;Pl,reltol,verbose) + end + function setup!(setup,A) + error("not implemented") + end + function finalize!(setup) + nothing + end + (;setup,solve!,setup!,finalize!) +end + function setup_dirichlet_bcs(params) mesh = params[:mesh] u = params[:u] @@ -198,7 +222,7 @@ end function assemble_system(state) timer = state.timer - @timeit timer "assemble_sytem" args = assemble_sytem_coo(state) + @timeit timer "assemble_sytem_coo" args = assemble_sytem_coo(state) I,J,V,b = args n_dofs = state.dofs.n_dofs @timeit timer "sparse" A = sparse(I,J,V,n_dofs,n_dofs) @@ -234,9 +258,9 @@ function assemble_sybmolic(state) end end - I_coo = zeros(Int,n_coo) - J_coo = zeros(Int,n_coo) - V_coo = zeros(Float64,n_coo) + I_coo = Vector{Int32}(undef,n_coo) + J_coo = Vector{Int32}(undef,n_coo) + V_coo = Vector{Float64}(undef,n_coo) b = zeros(Float64,state.dofs.n_dofs) n_coo = 0 @@ -247,12 +271,13 @@ function assemble_sybmolic(state) if !(dofs[i]>0) continue end + dofs_i = dofs[i] for j in 1:ndofs if !(dofs[j]>0) continue end n_coo += 1 - I_coo[n_coo] = dofs[i] + I_coo[n_coo] = dofs_i J_coo[n_coo] = dofs[j] end end @@ -281,8 +306,10 @@ function assemble_numeric_cells!(I_coo,J_coo,V_coo,b,state) ∇x = map(i->similar(i,size(i,2)),∇s) Aes = map(i->zeros(size(i,2),size(i,2)),∇s) fes = map(i->zeros(size(i,2)),∇s) + ∇st = map(m->collect(permutedims(m)),∇s) + st = map(m->collect(permutedims(m)),s) - Tx = SVector{d,Float64} + Tx = eltype(node_to_x) TJ = typeof(zero(Tx)*zero(Tx)') i_coo = 0 @@ -293,9 +320,9 @@ function assemble_numeric_cells!(I_coo,J_coo,V_coo,b,state) Ae = Aes[rid] fe = fes[rid] nl = length(nodes) - ∇se = ∇s[rid] + ∇ste = ∇st[rid] ∇xe = ∇x[rid] - se = s[rid] + ste = st[rid] we = w[rid] nq = length(we) fill!(Ae,zero(eltype(Ae))) @@ -306,8 +333,8 @@ function assemble_numeric_cells!(I_coo,J_coo,V_coo,b,state) xint = zero(Tx) for k in 1:nl x = node_to_x[nodes[k]] - ∇sqx = ∇se[iq,k] - sqx = se[iq,k] + ∇sqx = ∇ste[k,iq] + sqx = ste[k,iq] Jt += ∇sqx*x' xint += sqx*x end @@ -315,16 +342,17 @@ function assemble_numeric_cells!(I_coo,J_coo,V_coo,b,state) invJt = inv(Jt) dV = abs(detJt)*we[iq] for k in 1:nl - ∇xe[k] = invJt*∇se[iq,k] + ∇xe[k] = invJt*∇ste[k,iq] end for j in 1:nl + ∇xej = ∇xe[j] for i in 1:nl - Ae[i,j] += ∇xe[i]⋅∇xe[j]*dV + Ae[i,j] += ∇xe[i]⋅∇xej*dV end end fx = f(xint) for k in 1:nl - fe[k] += fx*se[iq,k]*dV + fe[k] += fx*ste[k,iq]*dV end end # Set the result in the output array @@ -445,7 +473,7 @@ function integrate_error_norms_loop(uh,state) ncells = length(cell_to_rid) ues = map(i->zeros(size(i,2)),∇s) ∇x = map(i->similar(i,size(i,2)),∇s) - Tx = SVector{d,Float64} + Tx = eltype(node_to_x) TJ = typeof(zero(Tx)*zero(Tx)') for cell in 1:ncells rid = cell_to_rid[cell] @@ -505,6 +533,9 @@ function integrate_error_norms(results,uh,state) end function export_results(uh,params,state) + if ! params[:export_vtu] + return nothing + end mesh = params[:mesh] example_path = params[:example_path] node_to_tag = state.dirichlet_bcs.node_to_tag @@ -514,6 +545,7 @@ function export_results(uh,params,state) vtk["uh"] = uh vtk["dim"] = gk.face_dim(mesh) end + nothing end ### From here, it is old stuff to to be deleted diff --git a/extensions/GalerkinToolkitExamples/test/example001_tests.jl b/extensions/GalerkinToolkitExamples/test/example001_tests.jl index 796d5742..40ef85f0 100644 --- a/extensions/GalerkinToolkitExamples/test/example001_tests.jl +++ b/extensions/GalerkinToolkitExamples/test/example001_tests.jl @@ -64,9 +64,10 @@ results = Example001.main(params) @test results[:el2] < tol params = Dict{Symbol,Any}() -params[:mesh] = gk.cartesian_mesh((0,3,0,2,0,1),(30,30,30)) -results = Example001.main(params) -@test results[:eh1] < tol -@test results[:el2] < tol +params[:mesh] = gk.cartesian_mesh((0,3,0,2,0,1),(80,80,80)) +params[:solver] = Example001.cg_amg_solver(;verbose=true) +params[:export_vtu] = false +Example001.main(params) +Example001.main(params) end # module