From 2a914ce75ec9326f74bc9666fbc504b5aa1cfff0 Mon Sep 17 00:00:00 2001 From: Francesc Verdugo Date: Mon, 5 Feb 2024 14:29:42 +0100 Subject: [PATCH 1/9] Starting to add example003 --- extensions/GalerkinToolkitExamples/src/example003.jl | 0 extensions/GalerkinToolkitExamples/test/example003_tests.jl | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 extensions/GalerkinToolkitExamples/src/example003.jl create mode 100644 extensions/GalerkinToolkitExamples/test/example003_tests.jl diff --git a/extensions/GalerkinToolkitExamples/src/example003.jl b/extensions/GalerkinToolkitExamples/src/example003.jl new file mode 100644 index 00000000..e69de29b diff --git a/extensions/GalerkinToolkitExamples/test/example003_tests.jl b/extensions/GalerkinToolkitExamples/test/example003_tests.jl new file mode 100644 index 00000000..e69de29b From 55656fc25503d4f179b76312cdf505ebfa16cfe7 Mon Sep 17 00:00:00 2001 From: Francesc Verdugo Date: Mon, 5 Feb 2024 16:19:33 +0100 Subject: [PATCH 2/9] Drafting example003 --- .../GalerkinToolkitExamples/src/example003.jl | 623 ++++++++++++++++++ 1 file changed, 623 insertions(+) diff --git a/extensions/GalerkinToolkitExamples/src/example003.jl b/extensions/GalerkinToolkitExamples/src/example003.jl index e69de29b..8d635446 100644 --- a/extensions/GalerkinToolkitExamples/src/example003.jl +++ b/extensions/GalerkinToolkitExamples/src/example003.jl @@ -0,0 +1,623 @@ +module Example003 + +import GalerkinToolkit as gk +import ForwardDiff +using StaticArrays +using LinearAlgebra +using SparseArrays +using WriteVTK + +# This one implements a vanilla sequential iso-parametric Poisson solver +# with FE assembly on GPUs + +function main(params_in) + + # Process params + params_default = default_params() + params = add_default_params(params_in,params_default) + results = Dict{Symbol,Any}() + + # Setup main data structures + state = setup(params) + add_basic_info(results,params,state) + + # Assemble system and solve it + A,b = assemble_system(state) + x = solve_system(A,b,params) + + # Post process + uh = setup_uh(x,state) + integrate_error_norms(results,uh,state) + export_results(uh,params,state) + + results +end + +function add_default_params(params_in,params_default) + UmD = setdiff(keys(params_in),keys(params_default)) + for k in UmD + @warn "Parameter with key :$k is unused" + end + UiD = intersect(keys(params_default),keys(params_in)) + DmU = setdiff(keys(params_default),keys(params_in)) + a = [ k=>params_in[k] for k in UiD] + b = [ k=>params_default[k] for k in DmU] + Dict(vcat(a,b)) +end + +function default_params() + outdir = mkpath(joinpath(@__DIR__,"..","output")) + params = Dict{Symbol,Any}() + params[:mesh] = gk.cartesian_mesh((0,1,0,1),(10,10)) + params[:u] = (x) -> sum(x) + params[:f] = (x) -> 0.0 + params[:g] = (x) -> 0.0 + params[:dirichlet_tags] = ["boundary"] + params[:neumann_tags] = String[] + params[:integration_degree] = 2 + params[:solver] = lu_solver() + params[:example_path] = joinpath(outdir,"example003") + params +end + +function lu_solver() + setup(x,A,b) = lu(A) + setup! = lu! + solve! = ldiv! + finalize!(S) = nothing + (;setup,setup!,solve!,finalize!) +end + +function setup_dirichlet_bcs(params) + mesh = params[:mesh] + u = params[:u] + dirichlet_tags = params[:dirichlet_tags] + node_to_tag = zeros(gk.num_nodes(mesh)) + tag_to_name = dirichlet_tags + gk.classify_mesh_nodes!(node_to_tag,mesh,tag_to_name) + free_and_dirichlet_nodes = gk.partition_from_mask(i->i==0,node_to_tag) + node_to_x = gk.node_coordinates(mesh) + dirichlet_nodes = last(free_and_dirichlet_nodes) + x_dirichlet = view(node_to_x,dirichlet_nodes) + u_dirichlet = u.(x_dirichlet) + dirichlet_bcs = (;free_and_dirichlet_nodes,u_dirichlet,node_to_tag) +end + +function setup_integration(params,objects) + mesh = params[:mesh] + degree = params[:integration_degree] + D = gk.num_dims(mesh) + if objects === :cells + d = D + elseif objects === :faces + d = D-1 + else + error("") + end + # Integration + ref_cells = gk.reference_faces(mesh,d) + face_to_rid = gk.face_reference_id(mesh,d) + integration_rules = map(ref_cells) do ref_cell + gk.default_quadrature(gk.geometry(ref_cell),degree) + end + rid_to_weights = map(gk.weights,integration_rules) + rid_to_coords = map(gk.coordinates,integration_rules) + integration = (;rid_to_weights,rid_to_coords,face_to_rid,d) +end + +function setup_isomap(params,integration) + rid_to_coords = integration.rid_to_coords + face_to_rid = integration.face_to_rid + d = integration.d + mesh = params[:mesh] + ref_cells = gk.reference_faces(mesh,d) + shape_funs = map(rid_to_coords,ref_cells) do q,ref_cell + shape_vals = gk.tabulator(ref_cell)(gk.value,q) + shape_grads = gk.tabulator(ref_cell)(ForwardDiff.gradient,q) + shape_vals, shape_grads + end + rid_to_shape_vals = map(first,shape_funs) + rid_to_shape_grads = map(last,shape_funs) + face_to_nodes = gk.face_nodes(mesh,d) + node_to_coords = gk.node_coordinates(mesh) + isomap = (;face_to_nodes,node_to_coords,face_to_rid,rid_to_shape_vals,rid_to_shape_grads,d) +end + +function setup_neumann_bcs(params) + mesh = params[:mesh] + neumann_tags = params[:neumann_tags] + neum_face_to_face = Int[] + if length(neumann_tags) != 0 + D = gk.num_dims(mesh) + tag_to_groups = gk.physical_faces(mesh,D-1) + neum_face_to_face = reduce(union,map(tag->tag_to_groups[tag],neumann_tags)) + end + (;neum_face_to_face) +end + +function setup_user_funs(params) + u = params[:u] + f = params[:f] + g = params[:g] + user_funs = (;u,f,g) +end + +function setup(params) + dirichlet_bcs = setup_dirichlet_bcs(params) + neumann_bcs = setup_neumann_bcs(params) + cell_integration = setup_integration(params,:cells) + face_integration = setup_integration(params,:faces) + cell_isomap = setup_isomap(params,cell_integration) + face_isomap = setup_isomap(params,face_integration) + user_funs = setup_user_funs(params) + solver = params[:solver] + state = (;solver,dirichlet_bcs,neumann_bcs,cell_integration,cell_isomap,face_integration,face_isomap,user_funs) +end + +function add_basic_info(results,params,state) + node_to_x = state.cell_isomap.node_to_coords + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + cell_to_rid = state.cell_isomap.face_to_rid + nfree = length(first(free_and_dirichlet_nodes)) + nnodes = length(node_to_x) + ncells = length(cell_to_rid) + results[:nnodes] = nnodes + results[:nfree] = nfree + results[:ncells] = ncells + results +end + +function integrate_matrix(state) + + # We assume a single cell type + @assert length(state.cell_isomap.rid_to_shape_grads) == 1 + + cell_to_nodes = state.cell_isomap.face_to_nodes + node_to_x = state.cell_isomap.node_to_coords + ∇se = state.cell_isomap.rid_to_shape_grads[1] + se = state.cell_isomap.rid_to_shape_vals[1] + we = state.cell_integration.rid_to_weights[1] + d = state.cell_integration.d + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_free_node = gk.permutation(free_and_dirichlet_nodes) + nfree = length(first(free_and_dirichlet_nodes)) + + cell + + n_local_nodes = size(∇se,2) + n_int_points = size(∇se,1) + n_cells = length(cell_to_nodes) + n_coo = n_cells*n_local_nodes + + Tx = SVector{d,Float64} + TJ = typeof(zero(Tx)*zero(Tx)') + + # Assemble + # This one should be good enough on the CPU + Ie = zeros(Float64,n_local_nodes*n_cells) + Je = zeros(Float64,n_local_nodes*n_cells) + for cell in 1:n_cells + nodes = cell_to_nodes[cell] + for i in 1:n_local_nodes + for j in 1:n_local_nodes + p_ij = (cell-1)*n_local_nodes*n_local_nodes + (j-1)*n_local_nodes + i + Ie[p_ij] = + Je[p_ij] = nodes[J] + end + end + end + + # Convert data to a format more suitable for gpus + # Perhaps more date is needed, and with appropriate reordering + nodese = JaggedArray(cell_to_nodes).data + cell_to_is_dirichlet = fill(false,n_cells) + for cell in 1:n_cells + nodes = cell_to_nodes[cell] + if any(node-> node_to_free_node[node]>nfree,nodes) + cell_to_is_dirichlet[cell] = true + end + end + + # TODO move to the GPU + # The code below runs on the CPU + # but it is implemented in a way close of what + # one will implement on the GPU + Ae = zeros(Float64,n_local_nodes*n_local_nodes*n_cells) + fe = zeros(Float64,n_local_nodes*n_cells) + + # Auxiliary memory + # can we get rid of this? + ∇xe = zeros(Tx,n_local_nodes*n_cells) + for cell in 1:n_cells + for iq in 1:n_int_points + Jt = zero(TJ) + xint = zero(Tx) + for k in 1:n_local_nodes + p_k = (cell-1)*n_local_nodes + k + node = nodese[p_k] + x = node_to_x[nodes[k]] + ∇sqx = ∇se[iq,k] + sqx = se[iq,k] + Jt += ∇sqx*x' + xint += sqx*x + end + detJt = det(Jt) + invJt = inv(Jt) + dV = abs(detJt)*we[iq] + for k in 1:n_local_nodes + p_k = (cell-1)*n_local_nodes + k + ∇xe[p_k] = invJt*∇se[iq,k] + end + # Matrix + for j in 1:n_local_nodes + p_j = (cell-1)*n_local_nodes + j + for i in 1:n_local_nodes + p_ij = (cell-1)*n_local_nodes*n_local_nodes + (j-1)*n_local_nodes + i + p_i = (cell-1)*n_local_nodes + i + V_coo[p_ij] += ∇xe[p_i]⋅∇xe[p_j]*dV + end + end + # Vector + fx = f(xint) + for k in 1:n_local_nodes + p_k = (cell-1)*n_local_nodes + k + b_coo[p_k] += fx*se[iq,k]*dV + end + end + end + + # Add boundary conditions to fe + # We provably don't want to merge this and the previous one + # since this will lead to branching but it will be shorter + for cell in 1:n_cells + if !cell_to_is_dirichlet[cell] + continue + end + for i in 1:n_local_nodes + p_i = (cell-1)*n_local_nodes + i + ni = nodese[p_i] + gi = node_to_free_node[ni] + if gi > nfree # Not really needed + continue + end + for j in 1:n_local_nodes + p_j = (cell-1)*n_local_nodes + j + nj = nodese[p_j] + gj = node_to_free_node[nj] + if gj <= nfree + continue + end + p_ij = (cell-1)*n_local_nodes*n_local_nodes + (j-1)*n_local_nodes + i + Aij = Ae[p_ij] + uj = u_dirichlet[gj-nfree] + be[p_i] -= Aij*uj + end + end + end + + + + + + + + + + +end + +function assemble_system_loop(state) + + cell_to_nodes = state.cell_isomap.face_to_nodes + cell_to_rid = state.cell_isomap.face_to_rid + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_x = state.cell_isomap.node_to_coords + ∇s = state.cell_isomap.rid_to_shape_grads + s = state.cell_isomap.rid_to_shape_vals + u_dirichlet = state.dirichlet_bcs.u_dirichlet + f = state.user_funs.f + w = state.cell_integration.rid_to_weights + d = state.cell_integration.d + + # Count coo entries + n_coo = 0 + ncells = length(cell_to_nodes) + node_to_free_node = gk.permutation(free_and_dirichlet_nodes) + nfree = length(first(free_and_dirichlet_nodes)) + for cell in 1:ncells + nodes = cell_to_nodes[cell] + for nj in nodes + if node_to_free_node[nj] > nfree + continue + end + for ni in nodes + if node_to_free_node[ni] > nfree + continue + end + n_coo += 1 + end + end + end + + # Allocate coo values + I_coo = zeros(Int32,n_coo) + J_coo = zeros(Int32,n_coo) + V_coo = zeros(Float64,n_coo) + b = zeros(nfree) + + # Allocate auxiliary buffers + ∇x = map(i->similar(i,size(i,2)),∇s) + Aes = map(i->zeros(size(i,2),size(i,2)),∇s) + fes = map(i->zeros(size(i,2)),∇s) + ues = map(i->zeros(size(i,2)),∇s) + + Tx = SVector{d,Float64} + TJ = typeof(zero(Tx)*zero(Tx)') + + # Fill coo values + i_coo = 0 + for cell in 1:ncells + rid = cell_to_rid[cell] + nodes = cell_to_nodes[cell] + Ae = Aes[rid] + ue = ues[rid] + fe = fes[rid] + nl = length(nodes) + ∇se = ∇s[rid] + ∇xe = ∇x[rid] + se = s[rid] + we = w[rid] + nq = length(we) + fill!(Ae,zero(eltype(Ae))) + fill!(fe,zero(eltype(Ae))) + fill!(ue,zero(eltype(Ae))) + for k in 1:nl + nk = nodes[k] + gk = node_to_free_node[nk] + if gk <= nfree + continue + end + ue[k] = u_dirichlet[gk-nfree] + end + for iq in 1:nq + Jt = zero(TJ) + xint = zero(Tx) + for k in 1:nl + x = node_to_x[nodes[k]] + ∇sqx = ∇se[iq,k] + sqx = se[iq,k] + Jt += ∇sqx*x' + xint += sqx*x + end + detJt = det(Jt) + invJt = inv(Jt) + dV = abs(detJt)*we[iq] + for k in 1:nl + ∇xe[k] = invJt*∇se[iq,k] + end + for j in 1:nl + for i in 1:nl + Ae[i,j] += ∇xe[i]⋅∇xe[j]*dV + end + end + fx = f(xint) + for k in 1:nl + fe[k] += fx*se[iq,k]*dV + end + end + for i in 1:nl + for j in 1:nl + fe[i] -= Ae[i,j]*ue[j] + end + end + for i in 1:nl + ni = nodes[i] + gi = node_to_free_node[ni] + if gi > nfree + continue + end + b[gi] += fe[i] + end + for j in 1:nl + nj = nodes[j] + gj = node_to_free_node[nj] + if gj > nfree + continue + end + for i in 1:nl + ni = nodes[i] + gi = node_to_free_node[ni] + if gi > nfree + continue + end + i_coo += 1 + I_coo[i_coo] = gi + J_coo[i_coo] = gj + V_coo[i_coo] = Ae[i,j] + end + end + end + add_neumann_bcs!(b,state) + I_coo,J_coo,V_coo,b +end + +function add_neumann_bcs!(b,state) + + neum_face_to_face = state.neumann_bcs.neum_face_to_face + if length(neum_face_to_face) == 0 + return nothing + end + face_to_rid = state.face_isomap.face_to_rid + face_to_nodes = state.face_isomap.face_to_nodes + ∇s_f = state.face_isomap.rid_to_shape_grads + s_f = state.face_isomap.rid_to_shape_vals + node_to_x = state.face_isomap.node_to_coords + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + g = state.user_funs.g + w_f = state.face_integration.rid_to_weights + d = state.cell_integration.d + + fes = map(i->zeros(size(i,2)),s_f) + Tx = SVector{d,Float64} + Ts = typeof(zero(SVector{d-1,Float64})*zero(Tx)') + node_to_free_node = gk.permutation(free_and_dirichlet_nodes) + nfree = length(first(free_and_dirichlet_nodes)) + + for face in neum_face_to_face + rid = face_to_rid[face] + nodes = face_to_nodes[face] + ∇se = ∇s_f[rid] + se = s_f[rid] + we = w_f[rid] + nq = length(we) + fe = fes[rid] + fill!(fe,zero(eltype(fe))) + nl = length(nodes) + for iq in 1:nq + Jt = zero(Ts) + xint = zero(Tx) + for k in 1:nl + x = node_to_x[nodes[k]] + ∇sqx = ∇se[iq,k] + sqx = se[iq,k] + Jt += ∇sqx*x' + xint += sqx*x + end + J = transpose(Jt) + dS = sqrt(det(Jt*J))*we[iq] + gx = g(xint) + for k in 1:nl + fe[k] += gx*se[iq,k]*dS + end + end + for i in 1:nl + ni = nodes[i] + gi = node_to_free_node[ni] + if gi > nfree + continue + end + b[gi] += fe[i] + end + end + + nothing +end + +function assemble_system(state) + I,J,V,b = assemble_system_loop(state) + nfree = length(b) + A = sparse(I,J,V,nfree,nfree) + A,b +end + +function solve_system(A,b,params) + solver = params[:solver] + x = similar(b,axes(A,2)) + setup = solver.setup(x,A,b) + solver.solve!(x,setup,b) + solver.finalize!(setup) + x +end + +function setup_uh(x,state) + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_x = state.cell_isomap.node_to_coords + u_free = x + u_dirichlet = state.dirichlet_bcs.u_dirichlet + nnodes = length(node_to_x) + uh = zeros(nnodes) + uh[first(free_and_dirichlet_nodes)] = u_free + uh[last(free_and_dirichlet_nodes)] = u_dirichlet + uh +end + +function integrate_error_norms_loop(uh,state) + + cell_to_nodes = state.cell_isomap.face_to_nodes + cell_to_rid = state.cell_isomap.face_to_rid + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_x = state.cell_isomap.node_to_coords + ∇s = state.cell_isomap.rid_to_shape_grads + s = state.cell_isomap.rid_to_shape_vals + u_dirichlet = state.dirichlet_bcs.u_dirichlet + u = state.user_funs.u + w = state.cell_integration.rid_to_weights + d = state.cell_integration.d + + eh1 = 0.0 + el2 = 0.0 + ncells = length(cell_to_rid) + ues = map(i->zeros(size(i,2)),∇s) + ∇x = map(i->similar(i,size(i,2)),∇s) + Tx = SVector{d,Float64} + TJ = typeof(zero(Tx)*zero(Tx)') + for cell in 1:ncells + rid = cell_to_rid[cell] + nodes = cell_to_nodes[cell] + ue = ues[rid] + nl = length(nodes) + ∇se = ∇s[rid] + ∇xe = ∇x[rid] + se = s[rid] + we = w[rid] + nq = length(we) + for k in 1:nl + nk = nodes[k] + ue[k] = uh[nk] + end + for iq in 1:nq + Jt = zero(TJ) + xint = zero(Tx) + for k in 1:nl + x = node_to_x[nodes[k]] + ∇sqx = ∇se[iq,k] + sqx = se[iq,k] + Jt += ∇sqx*x' + xint += sqx*x + end + detJt = det(Jt) + invJt = inv(Jt) + dV = abs(detJt)*we[iq] + for k in 1:nl + ∇xe[k] = invJt*∇se[iq,k] + end + ux = u(xint) + ∇ux = ForwardDiff.gradient(u,xint) + ∇uhx = zero(∇ux) + uhx = zero(ux) + for k in 1:nl + uek = ue[k] + ∇uhx += uek*∇xe[k] + uhx += uek*se[iq,k] + end + ∇ex = ∇ux - ∇uhx + ex = ux - uhx + eh1 += (∇ex⋅∇ex + ex*ex)*dV + el2 += ex*ex*dV + end + end + eh1, el2 +end + +function integrate_error_norms(results,uh,state) + eh1², el2² = integrate_error_norms_loop(uh,state) + eh1 = sqrt(eh1²) + el2 = sqrt(el2²) + results[:eh1] = eh1 + results[:el2] = el2 + results +end + +function export_results(uh,params,state) + mesh = params[:mesh] + example_path = params[:example_path] + node_to_tag = state.dirichlet_bcs.node_to_tag + vtk_grid(example_path,gk.vtk_args(mesh)...) do vtk + gk.vtk_physical_faces!(vtk,mesh) + vtk["tag"] = node_to_tag + vtk["uh"] = uh + vtk["dim"] = gk.face_dim(mesh) + end +end + +end # module From d700833e70692646c24d2b4d5a446179aa2ece93 Mon Sep 17 00:00:00 2001 From: Francesc Verdugo Date: Thu, 8 Feb 2024 15:39:26 +0100 Subject: [PATCH 3/9] First version of p-Laplacian working --- .../GalerkinToolkitExamples/Project.toml | 2 + .../src/GalerkinToolkitExamples.jl | 1 + .../GalerkinToolkitExamples/src/example003.jl | 554 +++++++++++------- .../test/example003_tests.jl | 27 + 4 files changed, 361 insertions(+), 223 deletions(-) diff --git a/extensions/GalerkinToolkitExamples/Project.toml b/extensions/GalerkinToolkitExamples/Project.toml index d295d8ba..b4757349 100644 --- a/extensions/GalerkinToolkitExamples/Project.toml +++ b/extensions/GalerkinToolkitExamples/Project.toml @@ -9,9 +9,11 @@ GalerkinToolkit = "5e3ba9c4-dd81-444d-b69a-0e7bd7bf60a4" IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9" PetscCall = "1194c000-87c4-4102-b4a0-a6217ec4849e" Preconditioners = "af69fa37-3177-5a40-98ee-561f696e4fcd" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" diff --git a/extensions/GalerkinToolkitExamples/src/GalerkinToolkitExamples.jl b/extensions/GalerkinToolkitExamples/src/GalerkinToolkitExamples.jl index 03dd517e..aa1d3e99 100644 --- a/extensions/GalerkinToolkitExamples/src/GalerkinToolkitExamples.jl +++ b/extensions/GalerkinToolkitExamples/src/GalerkinToolkitExamples.jl @@ -2,5 +2,6 @@ module GalerkinToolkitExamples include("example001.jl") include("example002.jl") +include("example003.jl") end # module diff --git a/extensions/GalerkinToolkitExamples/src/example003.jl b/extensions/GalerkinToolkitExamples/src/example003.jl index 8d635446..a3f6f34e 100644 --- a/extensions/GalerkinToolkitExamples/src/example003.jl +++ b/extensions/GalerkinToolkitExamples/src/example003.jl @@ -6,29 +6,38 @@ using StaticArrays using LinearAlgebra using SparseArrays using WriteVTK +using PartitionedArrays: JaggedArray, nzindex +using TimerOutputs +using NLsolve +using Random -# This one implements a vanilla sequential iso-parametric Poisson solver -# with FE assembly on GPUs +using Preconditioners +using IterativeSolvers: cg! + +# This one implements a vanilla sequential iso-parametric p-Laplacian solver by only +# using the mesh interface. function main(params_in) + timer = TimerOutput() + # Process params params_default = default_params() params = add_default_params(params_in,params_default) results = Dict{Symbol,Any}() # Setup main data structures - state = setup(params) + @timeit timer "setup" state = setup(params,timer) add_basic_info(results,params,state) - # Assemble system and solve it - A,b = assemble_system(state) - x = solve_system(A,b,params) + @timeit timer "solve_problem" x = solve_problem(params,state) # Post process - uh = setup_uh(x,state) - integrate_error_norms(results,uh,state) - export_results(uh,params,state) + @timeit timer "setup_uh" uh = setup_uh(x,state) + @timeit timer "integrate_error_norms" integrate_error_norms(results,uh,state) + @timeit timer "export_results" export_results(uh,params,state) + + display(timer) results end @@ -55,8 +64,10 @@ function default_params() params[:dirichlet_tags] = ["boundary"] params[:neumann_tags] = String[] params[:integration_degree] = 2 - params[:solver] = lu_solver() + params[:p] = 2 + params[:solver] = nlsolve_solver() params[:example_path] = joinpath(outdir,"example003") + params[:export_vtu] = true params end @@ -68,6 +79,60 @@ function lu_solver() (;setup,setup!,solve!,finalize!) end +function cg_amg_solver(;reltol=1.0e-6,verbose=false) + function setup(x,A,b) + Pl = AMGPreconditioner{SmoothedAggregation}(A) + cache = (;Pl,A) + cache + end + function solve!(x,setup,b) + (;Pl,A) = setup + fill!(x,0) + cg!(x, A, b;Pl,reltol,verbose) + end + function setup!(setup,A) + error("not implemented") + end + function finalize!(setup) + nothing + end + (;setup,solve!,setup!,finalize!) +end + +function nlsolve_solver(;linear_solver=lu_solver(),options...) + function setup(x0,nlp) + r0,J0,cache = nlp.linearize(x0) + dx = similar(r0,axes(J0,2)) # TODO is there any way of reusing this in the nonlinear solve? + ls_setup = linear_solver.setup(dx,J0,r0) + function linsolve(x,A,b) + # TODO we dont need to re-setup for the first + # linear solve. + # This can be avoided with a Ref{Bool} shared + # between this function and j! + linear_solver.setup!(ls_setup,A) + linear_solver.solve!(x,ls_setup,b) + x + end + f!(r,x) = nlp.residual!(r,x,cache) + j!(J,x) = nlp.jacobian!(J,x,cache) + df = OnceDifferentiable(f!,j!,x0,r0,J0) + (;df,linsolve,J0,cache,ls_setup,linear_solver) + end + function solve!(x,setup) + (;df,linsolve) = setup + result = nlsolve(df,x;linsolve,options...) + x .= result.zero + nothing + end + function setup!(setup,x0,nlp) + error("todo") + end + function finalize!(setup) + setup.linear_solver.finalize!(setup) + end + (;setup,solve!,setup!,finalize!) +end + function setup_dirichlet_bcs(params) mesh = params[:mesh] u = params[:u] @@ -118,7 +183,7 @@ function setup_isomap(params,integration) end rid_to_shape_vals = map(first,shape_funs) rid_to_shape_grads = map(last,shape_funs) - face_to_nodes = gk.face_nodes(mesh,d) + face_to_nodes = JaggedArray(gk.face_nodes(mesh,d)) node_to_coords = gk.node_coordinates(mesh) isomap = (;face_to_nodes,node_to_coords,face_to_rid,rid_to_shape_vals,rid_to_shape_grads,d) end @@ -142,16 +207,40 @@ function setup_user_funs(params) user_funs = (;u,f,g) end -function setup(params) - dirichlet_bcs = setup_dirichlet_bcs(params) - neumann_bcs = setup_neumann_bcs(params) - cell_integration = setup_integration(params,:cells) - face_integration = setup_integration(params,:faces) - cell_isomap = setup_isomap(params,cell_integration) - face_isomap = setup_isomap(params,face_integration) - user_funs = setup_user_funs(params) +function setup_dofs(params,dirichlet_bcs,cell_isomap,face_isomap) + free_and_dirichlet_nodes = dirichlet_bcs.free_and_dirichlet_nodes + node_to_free_node = gk.permutation(free_and_dirichlet_nodes) + n_free = length(first(free_and_dirichlet_nodes)) + n_dofs = n_free + cell_to_nodes = cell_isomap.face_to_nodes + face_to_nodes = face_isomap.face_to_nodes + function node_to_dof(node) + free_node = node_to_free_node[node] + if free_node <= n_free + return Int(free_node) + end + Int(-(free_node-n_free)) + end + cell_to_dofs_data = node_to_dof.(cell_to_nodes.data) + face_to_dofs_data = node_to_dof.(face_to_nodes.data) + cell_to_dofs = JaggedArray(cell_to_dofs_data,cell_to_nodes.ptrs) + face_to_dofs = JaggedArray(face_to_dofs_data,face_to_nodes.ptrs) + dofs = (;cell_to_dofs,face_to_dofs,n_dofs) + dofs +end + +function setup(params,timer) + @timeit timer "dirichlet_bcs" dirichlet_bcs = setup_dirichlet_bcs(params) + @timeit timer "neumann_bcs" neumann_bcs = setup_neumann_bcs(params) + @timeit timer "cell_integration" cell_integration = setup_integration(params,:cells) + @timeit timer "face_integration" face_integration = setup_integration(params,:faces) + @timeit timer "cell_isomap" cell_isomap = setup_isomap(params,cell_integration) + @timeit timer "face_isomap" face_isomap = setup_isomap(params,face_integration) + @timeit timer "dofs" dofs = setup_dofs(params,dirichlet_bcs,cell_isomap,face_isomap) + @timeit timer "user_funs" user_funs = setup_user_funs(params) solver = params[:solver] - state = (;solver,dirichlet_bcs,neumann_bcs,cell_integration,cell_isomap,face_integration,face_isomap,user_funs) + p = params[:p] + state = (;p,timer,solver,dirichlet_bcs,neumann_bcs,cell_integration,cell_isomap,face_integration,face_isomap,user_funs,dofs) end function add_basic_info(results,params,state) @@ -167,147 +256,119 @@ function add_basic_info(results,params,state) results end -function integrate_matrix(state) - - # We assume a single cell type - @assert length(state.cell_isomap.rid_to_shape_grads) == 1 - - cell_to_nodes = state.cell_isomap.face_to_nodes - node_to_x = state.cell_isomap.node_to_coords - ∇se = state.cell_isomap.rid_to_shape_grads[1] - se = state.cell_isomap.rid_to_shape_vals[1] - we = state.cell_integration.rid_to_weights[1] - d = state.cell_integration.d - free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes - node_to_free_node = gk.permutation(free_and_dirichlet_nodes) - nfree = length(first(free_and_dirichlet_nodes)) - - cell - - n_local_nodes = size(∇se,2) - n_int_points = size(∇se,1) - n_cells = length(cell_to_nodes) - n_coo = n_cells*n_local_nodes - - Tx = SVector{d,Float64} - TJ = typeof(zero(Tx)*zero(Tx)') - - # Assemble - # This one should be good enough on the CPU - Ie = zeros(Float64,n_local_nodes*n_cells) - Je = zeros(Float64,n_local_nodes*n_cells) - for cell in 1:n_cells - nodes = cell_to_nodes[cell] - for i in 1:n_local_nodes - for j in 1:n_local_nodes - p_ij = (cell-1)*n_local_nodes*n_local_nodes + (j-1)*n_local_nodes + i - Ie[p_ij] = - Je[p_ij] = nodes[J] - end - end +function nonlinear_problem(state) + function initial() + n = state.dofs.n_dofs + Random.seed!(1) + rand(Float64,n) end - - # Convert data to a format more suitable for gpus - # Perhaps more date is needed, and with appropriate reordering - nodese = JaggedArray(cell_to_nodes).data - cell_to_is_dirichlet = fill(false,n_cells) - for cell in 1:n_cells - nodes = cell_to_nodes[cell] - if any(node-> node_to_free_node[node]>nfree,nodes) - cell_to_is_dirichlet[cell] = true - end + function linearize(u_dofs) + r,J,V,K = assemble_sybmolic(state) + cache = (V,K) + residual_cells!(r,u_dofs,state) + residual_faces!(r,u_dofs,state) + jacobian_cells!(V,u_dofs,state) + setcoofast!(J,V,K) + r,J,cache + end + function residual!(r,u_dofs,cache) + fill!(r,0) + residual_cells!(r,u_dofs,state) + residual_faces!(r,u_dofs,state) + r + end + function jacobian!(J,u_dofs,cache) + (V,K) = cache + LinearAlgebra.fillstored!(J,0) + jacobian_cells!(V,u_dofs,state) + setcoofast!(J,V,K) + J end + (;initial,linearize,residual!,jacobian!) +end - # TODO move to the GPU - # The code below runs on the CPU - # but it is implemented in a way close of what - # one will implement on the GPU - Ae = zeros(Float64,n_local_nodes*n_local_nodes*n_cells) - fe = zeros(Float64,n_local_nodes*n_cells) - - # Auxiliary memory - # can we get rid of this? - ∇xe = zeros(Tx,n_local_nodes*n_cells) - for cell in 1:n_cells - for iq in 1:n_int_points - Jt = zero(TJ) - xint = zero(Tx) - for k in 1:n_local_nodes - p_k = (cell-1)*n_local_nodes + k - node = nodese[p_k] - x = node_to_x[nodes[k]] - ∇sqx = ∇se[iq,k] - sqx = se[iq,k] - Jt += ∇sqx*x' - xint += sqx*x - end - detJt = det(Jt) - invJt = inv(Jt) - dV = abs(detJt)*we[iq] - for k in 1:n_local_nodes - p_k = (cell-1)*n_local_nodes + k - ∇xe[p_k] = invJt*∇se[iq,k] +function assemble_sybmolic(state) + cell_to_dofs = state.dofs.cell_to_dofs + n_dofs = state.dofs.n_dofs + + n_coo = 0 + ncells = length(cell_to_dofs) + for cell in 1:ncells + dofs = cell_to_dofs[cell] + ndofs = length(dofs) + for i in 1:ndofs + if !(dofs[i]>0) + continue end - # Matrix - for j in 1:n_local_nodes - p_j = (cell-1)*n_local_nodes + j - for i in 1:n_local_nodes - p_ij = (cell-1)*n_local_nodes*n_local_nodes + (j-1)*n_local_nodes + i - p_i = (cell-1)*n_local_nodes + i - V_coo[p_ij] += ∇xe[p_i]⋅∇xe[p_j]*dV + for j in 1:ndofs + if !(dofs[j]>0) + continue end - end - # Vector - fx = f(xint) - for k in 1:n_local_nodes - p_k = (cell-1)*n_local_nodes + k - b_coo[p_k] += fx*se[iq,k]*dV + n_coo += 1 end end end - # Add boundary conditions to fe - # We provably don't want to merge this and the previous one - # since this will lead to branching but it will be shorter - for cell in 1:n_cells - if !cell_to_is_dirichlet[cell] - continue - end - for i in 1:n_local_nodes - p_i = (cell-1)*n_local_nodes + i - ni = nodese[p_i] - gi = node_to_free_node[ni] - if gi > nfree # Not really needed + I_coo = Vector{Int32}(undef,n_coo) + J_coo = Vector{Int32}(undef,n_coo) + V_coo = Vector{Float64}(undef,n_coo) + r = zeros(Float64,state.dofs.n_dofs) + + n_coo = 0 + for cell in 1:ncells + dofs = cell_to_dofs[cell] + ndofs = length(dofs) + for i in 1:ndofs + if !(dofs[i]>0) continue end - for j in 1:n_local_nodes - p_j = (cell-1)*n_local_nodes + j - nj = nodese[p_j] - gj = node_to_free_node[nj] - if gj <= nfree + dofs_i = dofs[i] + for j in 1:ndofs + if !(dofs[j]>0) continue end - p_ij = (cell-1)*n_local_nodes*n_local_nodes + (j-1)*n_local_nodes + i - Aij = Ae[p_ij] - uj = u_dirichlet[gj-nfree] - be[p_i] -= Aij*uj + n_coo += 1 + I_coo[n_coo] = dofs_i + J_coo[n_coo] = dofs[j] end end end + J = sparse(I_coo,J_coo,V_coo,n_dofs,n_dofs) + K = precompute_nzindex(J,I_coo,J_coo) + r,J,V_coo,K +end +function precompute_nzindex(A,I,J) + K = zeros(Int32,length(I)) + for (p,(i,j)) in enumerate(zip(I,J)) + K[p] = nzindex(A,i,j) + end + K +end +function setcoofast!(A,V,K) + LinearAlgebra.fillstored!(A,0) + A_nz = nonzeros(A) + for (k,v) in zip(K,V) + A_nz[k] += v + end + A +end +@inline function p_laplace_residual(∇u,∇dv,dv,f,p) + ∇dv⋅((norm(∇u)^(p-2))*∇u) - f*dv +end - - - - - +@inline function p_laplace_jacobian(∇u,∇du,∇dv,p) + pm2 = p-2 + pm4 = p-4 + ∇dv⋅((norm(∇u)^pm2)*∇du) + ∇dv⋅(pm2*(norm(∇u)^pm4)*(∇u⋅∇du)*∇u) end -function assemble_system_loop(state) +function residual_cells!(r,u_dofs,state) + cell_to_dofs = state.dofs.cell_to_dofs cell_to_nodes = state.cell_isomap.face_to_nodes cell_to_rid = state.cell_isomap.face_to_rid free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes @@ -318,141 +379,194 @@ function assemble_system_loop(state) f = state.user_funs.f w = state.cell_integration.rid_to_weights d = state.cell_integration.d - - # Count coo entries - n_coo = 0 + u_dirichlet = state.dirichlet_bcs.u_dirichlet ncells = length(cell_to_nodes) - node_to_free_node = gk.permutation(free_and_dirichlet_nodes) - nfree = length(first(free_and_dirichlet_nodes)) + p = state.p + + # Allocate auxiliary buffers + ∇x = map(i->similar(i,size(i,2)),∇s) + Aes = map(i->zeros(size(i,2),size(i,2)),∇s) + fes = map(i->zeros(size(i,2)),∇s) + ues = map(i->zeros(size(i,2)),∇s) + ∇st = map(m->collect(permutedims(m)),∇s) + st = map(m->collect(permutedims(m)),s) + + Tx = eltype(node_to_x) + TJ = typeof(zero(Tx)*zero(Tx)') + T = eltype(Tx) + + i_coo = 0 for cell in 1:ncells + rid = cell_to_rid[cell] nodes = cell_to_nodes[cell] - for nj in nodes - if node_to_free_node[nj] > nfree - continue + dofs = cell_to_dofs[cell] + Ae = Aes[rid] + fe = fes[rid] + ue = ues[rid] + nl = length(nodes) + ∇ste = ∇st[rid] + ∇xe = ∇x[rid] + ste = st[rid] + we = w[rid] + nq = length(we) + fill!(Ae,zero(eltype(Ae))) + fill!(fe,zero(eltype(Ae))) + # Integrate + for iq in 1:nq + Jt = zero(TJ) + xint = zero(Tx) + for k in 1:nl + x = node_to_x[nodes[k]] + ∇sqx = ∇ste[k,iq] + sqx = ste[k,iq] + Jt += ∇sqx*x' + xint += sqx*x end - for ni in nodes - if node_to_free_node[ni] > nfree + detJt = det(Jt) + invJt = inv(Jt) + dV = abs(detJt)*we[iq] + for k in 1:nl + dof_k = dofs[k] + if dof_k < 1 + uk = u_dirichlet[-dof_k] + ue[k] = uk continue end - n_coo += 1 + ue[k] = u_dofs[dof_k] + end + ∇u = zero(Tx) + for k in 1:nl + uek = ue[k] + ∇xek = invJt*∇ste[k,iq] + ∇u += ∇xek*uek + ∇xe[k] = ∇xek + end + fx = f(xint) + for k in 1:nl + dv = ste[k,iq] + ∇dv = ∇xe[k] + fe[k] += ( p_laplace_residual(∇u,∇dv,dv,fx,p) )*dV + end + end + for i in 1:nl + if !(dofs[i]>0) + continue end + r[dofs[i]] += fe[i] end end +end + +function jacobian_cells!(V_coo,u_dofs,state) - # Allocate coo values - I_coo = zeros(Int32,n_coo) - J_coo = zeros(Int32,n_coo) - V_coo = zeros(Float64,n_coo) - b = zeros(nfree) + cell_to_dofs = state.dofs.cell_to_dofs + cell_to_nodes = state.cell_isomap.face_to_nodes + cell_to_rid = state.cell_isomap.face_to_rid + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_x = state.cell_isomap.node_to_coords + ∇s = state.cell_isomap.rid_to_shape_grads + s = state.cell_isomap.rid_to_shape_vals + u_dirichlet = state.dirichlet_bcs.u_dirichlet + f = state.user_funs.f + w = state.cell_integration.rid_to_weights + d = state.cell_integration.d + u_dirichlet = state.dirichlet_bcs.u_dirichlet + ncells = length(cell_to_nodes) + p = state.p # Allocate auxiliary buffers ∇x = map(i->similar(i,size(i,2)),∇s) Aes = map(i->zeros(size(i,2),size(i,2)),∇s) fes = map(i->zeros(size(i,2)),∇s) ues = map(i->zeros(size(i,2)),∇s) + ∇st = map(m->collect(permutedims(m)),∇s) + st = map(m->collect(permutedims(m)),s) - Tx = SVector{d,Float64} + Tx = eltype(node_to_x) TJ = typeof(zero(Tx)*zero(Tx)') + T = eltype(Tx) - # Fill coo values i_coo = 0 for cell in 1:ncells rid = cell_to_rid[cell] nodes = cell_to_nodes[cell] + dofs = cell_to_dofs[cell] Ae = Aes[rid] - ue = ues[rid] fe = fes[rid] + ue = ues[rid] nl = length(nodes) - ∇se = ∇s[rid] + ∇ste = ∇st[rid] ∇xe = ∇x[rid] - se = s[rid] + ste = st[rid] we = w[rid] nq = length(we) fill!(Ae,zero(eltype(Ae))) fill!(fe,zero(eltype(Ae))) - fill!(ue,zero(eltype(Ae))) - for k in 1:nl - nk = nodes[k] - gk = node_to_free_node[nk] - if gk <= nfree - continue - end - ue[k] = u_dirichlet[gk-nfree] - end + # Integrate for iq in 1:nq Jt = zero(TJ) - xint = zero(Tx) for k in 1:nl x = node_to_x[nodes[k]] - ∇sqx = ∇se[iq,k] - sqx = se[iq,k] + ∇sqx = ∇ste[k,iq] + sqx = ste[k,iq] Jt += ∇sqx*x' - xint += sqx*x end detJt = det(Jt) invJt = inv(Jt) dV = abs(detJt)*we[iq] for k in 1:nl - ∇xe[k] = invJt*∇se[iq,k] - end - for j in 1:nl - for i in 1:nl - Ae[i,j] += ∇xe[i]⋅∇xe[j]*dV + dof_k = dofs[k] + if dof_k < 1 + uk = u_dirichlet[-dof_k] + ue[k] = uk + continue end + ue[k] = u_dofs[dof_k] end - fx = f(xint) + ∇u = zero(Tx) for k in 1:nl - fe[k] += fx*se[iq,k]*dV + uek = ue[k] + ∇xek = invJt*∇ste[k,iq] + ∇u += ∇xek*uek + ∇xe[k] = ∇xek end - end - for i in 1:nl for j in 1:nl - fe[i] -= Ae[i,j]*ue[j] + ∇du = ∇xe[j] + for i in 1:nl + ∇dv = ∇xe[i] + Ae[i,j] += ( p_laplace_jacobian(∇u,∇du,∇dv,p) )*dV + end end end + # Set the result in the output array for i in 1:nl - ni = nodes[i] - gi = node_to_free_node[ni] - if gi > nfree + if !(dofs[i]>0) continue end - b[gi] += fe[i] - end - for j in 1:nl - nj = nodes[j] - gj = node_to_free_node[nj] - if gj > nfree - continue - end - for i in 1:nl - ni = nodes[i] - gi = node_to_free_node[ni] - if gi > nfree + for j in 1:nl + if !(dofs[j]>0) continue end i_coo += 1 - I_coo[i_coo] = gi - J_coo[i_coo] = gj V_coo[i_coo] = Ae[i,j] end end end - add_neumann_bcs!(b,state) - I_coo,J_coo,V_coo,b end -function add_neumann_bcs!(b,state) +function residual_faces!(r,u_dofs,state) neum_face_to_face = state.neumann_bcs.neum_face_to_face if length(neum_face_to_face) == 0 return nothing end + face_to_rid = state.face_isomap.face_to_rid + face_to_dofs = state.dofs.face_to_dofs face_to_nodes = state.face_isomap.face_to_nodes ∇s_f = state.face_isomap.rid_to_shape_grads s_f = state.face_isomap.rid_to_shape_vals node_to_x = state.face_isomap.node_to_coords - free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes g = state.user_funs.g w_f = state.face_integration.rid_to_weights d = state.cell_integration.d @@ -460,12 +574,11 @@ function add_neumann_bcs!(b,state) fes = map(i->zeros(size(i,2)),s_f) Tx = SVector{d,Float64} Ts = typeof(zero(SVector{d-1,Float64})*zero(Tx)') - node_to_free_node = gk.permutation(free_and_dirichlet_nodes) - nfree = length(first(free_and_dirichlet_nodes)) for face in neum_face_to_face rid = face_to_rid[face] nodes = face_to_nodes[face] + dofs = face_to_dofs[face] ∇se = ∇s_f[rid] se = s_f[rid] we = w_f[rid] @@ -491,31 +604,22 @@ function add_neumann_bcs!(b,state) end end for i in 1:nl - ni = nodes[i] - gi = node_to_free_node[ni] - if gi > nfree + if !(dofs[i]>0) continue end - b[gi] += fe[i] + r[dofs[i]] -= fe[i] end end - - nothing end -function assemble_system(state) - I,J,V,b = assemble_system_loop(state) - nfree = length(b) - A = sparse(I,J,V,nfree,nfree) - A,b -end - -function solve_system(A,b,params) +function solve_problem(params,state) + timer = state.timer + problem = nonlinear_problem(state) solver = params[:solver] - x = similar(b,axes(A,2)) - setup = solver.setup(x,A,b) - solver.solve!(x,setup,b) - solver.finalize!(setup) + x = problem.initial() + @timeit timer "solver.setup" setup = solver.setup(x,problem) + @timeit timer "solver.solve!" solver.solve!(x,setup) + @timeit timer "solver.finalize!" solver.finalize!(setup) x end @@ -549,7 +653,7 @@ function integrate_error_norms_loop(uh,state) ncells = length(cell_to_rid) ues = map(i->zeros(size(i,2)),∇s) ∇x = map(i->similar(i,size(i,2)),∇s) - Tx = SVector{d,Float64} + Tx = eltype(node_to_x) TJ = typeof(zero(Tx)*zero(Tx)') for cell in 1:ncells rid = cell_to_rid[cell] @@ -609,6 +713,9 @@ function integrate_error_norms(results,uh,state) end function export_results(uh,params,state) + if ! params[:export_vtu] + return nothing + end mesh = params[:mesh] example_path = params[:example_path] node_to_tag = state.dirichlet_bcs.node_to_tag @@ -618,6 +725,7 @@ function export_results(uh,params,state) vtk["uh"] = uh vtk["dim"] = gk.face_dim(mesh) end + nothing end end # module diff --git a/extensions/GalerkinToolkitExamples/test/example003_tests.jl b/extensions/GalerkinToolkitExamples/test/example003_tests.jl index e69de29b..1d6e0cf6 100644 --- a/extensions/GalerkinToolkitExamples/test/example003_tests.jl +++ b/extensions/GalerkinToolkitExamples/test/example003_tests.jl @@ -0,0 +1,27 @@ +module Example003Tests + +import GalerkinToolkit as gk +using GalerkinToolkitExamples: Example003 +using Test + +tol = 1.0e-10 + +params = Dict{Symbol,Any}() +params[:mesh] = gk.cartesian_mesh((0,1,0,1),(3,3)) +results = Example003.main(params) +@test results[:eh1] < tol +@test results[:el2] < tol + +params = Dict{Symbol,Any}() +params[:mesh] = gk.cartesian_mesh((0,3,0,2),(5,10),complexify=false) +params[:dirichlet_tags] = ["1-face-1","1-face-3","1-face-4"] +params[:neumann_tags] = ["1-face-2"] +params[:u] = (x) -> sum(x) +params[:f] = (x) -> 0.0 +params[:g] = (x) -> 1.0 +results = Example003.main(params) +@test results[:eh1] < tol +@test results[:el2] < tol +@test results[:ncells] == 10*5 + +end # module From db11f0c950b3302582315db7933567811ff37456 Mon Sep 17 00:00:00 2001 From: Francesc Verdugo Date: Thu, 8 Feb 2024 17:20:09 +0100 Subject: [PATCH 4/9] More enhancements in example003 --- .../GalerkinToolkitExamples/src/example001.jl | 10 +-- .../GalerkinToolkitExamples/src/example003.jl | 69 ++++++------------- .../test/example003_tests.jl | 21 +++++- 3 files changed, 48 insertions(+), 52 deletions(-) diff --git a/extensions/GalerkinToolkitExamples/src/example001.jl b/extensions/GalerkinToolkitExamples/src/example001.jl index f58cb7dc..80ad1a6a 100644 --- a/extensions/GalerkinToolkitExamples/src/example001.jl +++ b/extensions/GalerkinToolkitExamples/src/example001.jl @@ -78,19 +78,21 @@ function lu_solver() (;setup,setup!,solve!,finalize!) end -function cg_amg_solver(;reltol=1.0e-6,verbose=false) +function cg_amg_solver(;reltol=1.0e-6,verbose=false,timer=TimerOutput()) function setup(x,A,b) Pl = AMGPreconditioner{SmoothedAggregation}(A) - cache = (;Pl,A) + cache = Ref((;Pl,A)) cache end function solve!(x,setup,b) - (;Pl,A) = setup + (;Pl,A) = setup[] fill!(x,0) cg!(x, A, b;Pl,reltol,verbose) end function setup!(setup,A) - error("not implemented") + Pl = AMGPreconditioner{SmoothedAggregation}(A) + setup[] = (;Pl,A) + setup end function finalize!(setup) nothing diff --git a/extensions/GalerkinToolkitExamples/src/example003.jl b/extensions/GalerkinToolkitExamples/src/example003.jl index a3f6f34e..efee5e0c 100644 --- a/extensions/GalerkinToolkitExamples/src/example003.jl +++ b/extensions/GalerkinToolkitExamples/src/example003.jl @@ -10,6 +10,7 @@ using PartitionedArrays: JaggedArray, nzindex using TimerOutputs using NLsolve using Random +using GalerkinToolkitExamples: Example001 using Preconditioners using IterativeSolvers: cg! @@ -19,15 +20,15 @@ using IterativeSolvers: cg! function main(params_in) - timer = TimerOutput() # Process params params_default = default_params() params = add_default_params(params_in,params_default) results = Dict{Symbol,Any}() + timer = params[:timer] # Setup main data structures - @timeit timer "setup" state = setup(params,timer) + @timeit timer "setup" state = setup(params) add_basic_info(results,params,state) @timeit timer "solve_problem" x = solve_problem(params,state) @@ -37,7 +38,7 @@ function main(params_in) @timeit timer "integrate_error_norms" integrate_error_norms(results,uh,state) @timeit timer "export_results" export_results(uh,params,state) - display(timer) + print_timer(timer,allocations=false) results end @@ -68,49 +69,22 @@ function default_params() params[:solver] = nlsolve_solver() params[:example_path] = joinpath(outdir,"example003") params[:export_vtu] = true + params[:timer] = TimerOutput() params end -function lu_solver() - setup(x,A,b) = lu(A) - setup! = lu! - solve! = ldiv! - finalize!(S) = nothing - (;setup,setup!,solve!,finalize!) -end - -function cg_amg_solver(;reltol=1.0e-6,verbose=false) - function setup(x,A,b) - Pl = AMGPreconditioner{SmoothedAggregation}(A) - cache = (;Pl,A) - cache - end - function solve!(x,setup,b) - (;Pl,A) = setup - fill!(x,0) - cg!(x, A, b;Pl,reltol,verbose) - end - function setup!(setup,A) - error("not implemented") - end - function finalize!(setup) - nothing - end - (;setup,solve!,setup!,finalize!) -end - -function nlsolve_solver(;linear_solver=lu_solver(),options...) +function nlsolve_solver(;linear_solver=Example001.lu_solver(),timer=TimerOutput(),options...) function setup(x0,nlp) - r0,J0,cache = nlp.linearize(x0) + @timeit timer "linearize" r0,J0,cache = nlp.linearize(x0) dx = similar(r0,axes(J0,2)) # TODO is there any way of reusing this in the nonlinear solve? - ls_setup = linear_solver.setup(dx,J0,r0) + @timeit timer "linear_solver_setup" ls_setup = linear_solver.setup(dx,J0,r0) function linsolve(x,A,b) # TODO we dont need to re-setup for the first # linear solve. # This can be avoided with a Ref{Bool} shared # between this function and j! - linear_solver.setup!(ls_setup,A) - linear_solver.solve!(x,ls_setup,b) + @timeit timer "linear_solver_setup!" linear_solver.setup!(ls_setup,A) + @timeit timer "linear_solver_solve!" linear_solver.solve!(x,ls_setup,b) x end f!(r,x) = nlp.residual!(r,x,cache) @@ -229,7 +203,8 @@ function setup_dofs(params,dirichlet_bcs,cell_isomap,face_isomap) dofs end -function setup(params,timer) +function setup(params) + timer = params[:timer] @timeit timer "dirichlet_bcs" dirichlet_bcs = setup_dirichlet_bcs(params) @timeit timer "neumann_bcs" neumann_bcs = setup_neumann_bcs(params) @timeit timer "cell_integration" cell_integration = setup_integration(params,:cells) @@ -257,31 +232,31 @@ function add_basic_info(results,params,state) end function nonlinear_problem(state) + timer = state.timer function initial() n = state.dofs.n_dofs Random.seed!(1) rand(Float64,n) end function linearize(u_dofs) - r,J,V,K = assemble_sybmolic(state) + @timeit timer "assemble_sybmolic" r,J,V,K = assemble_sybmolic(state) cache = (V,K) - residual_cells!(r,u_dofs,state) - residual_faces!(r,u_dofs,state) - jacobian_cells!(V,u_dofs,state) - setcoofast!(J,V,K) + @timeit timer "residual_cells!" residual_cells!(r,u_dofs,state) + @timeit timer "residual_faces!" residual_faces!(r,u_dofs,state) + @timeit timer "jacobian_cells!" jacobian_cells!(V,u_dofs,state) + @timeit timer "sparse_matrix!" setcoofast!(J,V,K) r,J,cache end function residual!(r,u_dofs,cache) fill!(r,0) - residual_cells!(r,u_dofs,state) - residual_faces!(r,u_dofs,state) + @timeit timer "residual_cells!" residual_cells!(r,u_dofs,state) + @timeit timer "residual_faces!" residual_faces!(r,u_dofs,state) r end function jacobian!(J,u_dofs,cache) (V,K) = cache - LinearAlgebra.fillstored!(J,0) - jacobian_cells!(V,u_dofs,state) - setcoofast!(J,V,K) + @timeit timer "jacobian_cells!" jacobian_cells!(V,u_dofs,state) + @timeit timer "sparse_matrix!" setcoofast!(J,V,K) J end (;initial,linearize,residual!,jacobian!) diff --git a/extensions/GalerkinToolkitExamples/test/example003_tests.jl b/extensions/GalerkinToolkitExamples/test/example003_tests.jl index 1d6e0cf6..a9b09be3 100644 --- a/extensions/GalerkinToolkitExamples/test/example003_tests.jl +++ b/extensions/GalerkinToolkitExamples/test/example003_tests.jl @@ -1,8 +1,9 @@ module Example003Tests import GalerkinToolkit as gk -using GalerkinToolkitExamples: Example003 +using GalerkinToolkitExamples: Example003, Example001 using Test + using TimerOutputs tol = 1.0e-10 @@ -24,4 +25,22 @@ results = Example003.main(params) @test results[:el2] < tol @test results[:ncells] == 10*5 +params = Dict{Symbol,Any}() +timer = TimerOutput() +@timeit timer "cartesian_mesh" params[:mesh] = gk.cartesian_mesh((0,3,0,2,0,1),(5,5,5)) +linear_solver = Example001.cg_amg_solver(;verbose=true) +params[:solver] = Example003.nlsolve_solver(;timer,linear_solver,show_trace=true,method=:newton) +params[:export_vtu] = false +params[:timer] = timer +Example003.main(params) + +params = Dict{Symbol,Any}() +timer = TimerOutput() +@timeit timer "cartesian_mesh" params[:mesh] = gk.cartesian_mesh((0,3,0,2,0,1),(70,70,70)) +linear_solver = Example001.cg_amg_solver(;verbose=true) +params[:solver] = Example003.nlsolve_solver(;timer,linear_solver,show_trace=true,method=:newton) +params[:export_vtu] = false +params[:timer] = timer +Example003.main(params) + end # module From 6ccf2e7f936fbd3c78ed175790d2c5b99f0df4b2 Mon Sep 17 00:00:00 2001 From: Francesc Verdugo Date: Thu, 8 Feb 2024 18:29:41 +0100 Subject: [PATCH 5/9] More enhancements in example003 --- .../GalerkinToolkitExamples/src/example003.jl | 66 +++++++++++++++++-- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/extensions/GalerkinToolkitExamples/src/example003.jl b/extensions/GalerkinToolkitExamples/src/example003.jl index efee5e0c..623f398c 100644 --- a/extensions/GalerkinToolkitExamples/src/example003.jl +++ b/extensions/GalerkinToolkitExamples/src/example003.jl @@ -6,7 +6,7 @@ using StaticArrays using LinearAlgebra using SparseArrays using WriteVTK -using PartitionedArrays: JaggedArray, nzindex +using PartitionedArrays: JaggedArray, nzindex, val_parameter using TimerOutputs using NLsolve using Random @@ -244,7 +244,7 @@ function nonlinear_problem(state) @timeit timer "residual_cells!" residual_cells!(r,u_dofs,state) @timeit timer "residual_faces!" residual_faces!(r,u_dofs,state) @timeit timer "jacobian_cells!" jacobian_cells!(V,u_dofs,state) - @timeit timer "sparse_matrix!" setcoofast!(J,V,K) + @timeit timer "sparse_matrix!" sparse_matrix!(J,V,K) r,J,cache end function residual!(r,u_dofs,cache) @@ -256,7 +256,7 @@ function nonlinear_problem(state) function jacobian!(J,u_dofs,cache) (V,K) = cache @timeit timer "jacobian_cells!" jacobian_cells!(V,u_dofs,state) - @timeit timer "sparse_matrix!" setcoofast!(J,V,K) + @timeit timer "sparse_matrix!" sparse_matrix!(J,V,K) J end (;initial,linearize,residual!,jacobian!) @@ -309,23 +309,77 @@ function assemble_sybmolic(state) end end - J = sparse(I_coo,J_coo,V_coo,n_dofs,n_dofs) - K = precompute_nzindex(J,I_coo,J_coo) + J,K = sparse_matrix(I_coo,J_coo,V_coo,n_dofs,n_dofs;reuse=true,skip_out_of_bounds=true) r,J,V_coo,K end +struct FilteredCooVector{F,A,B,C,T} <: AbstractVector{T} + f::F + I::A + J::B + V::C + function FilteredCooVector(f::F,I::A,J::B,V::C) where {F,A,B,C} + T = eltype(C) + new{F,A,B,C,T}(f,I,J,V) + end +end +Base.size(a::FilteredCooVector) = size(a.V) +Base.IndexStyle(::Type{<:FilteredCooVector}) = IndexLinear() +Base.@propagate_inbounds function Base.getindex(a::FilteredCooVector,k::Int) + i = a.I[k] + j = a.J[k] + v = a.V[k] + if i < 1 || j < 1 + return a.f(v) + end + v +end + +function sparse_matrix(I,J,V,m,n;kwargs...) + sparse_matrix(sparse,I,J,V,m,n;kwargs...) +end +function sparse_matrix(f,I,J,V,m,n;reuse=Val(false),skip_out_of_bounds=true) + if !skip_out_of_bounds + I2 = I + J2 = J + V2 = V + elseif m*n == 0 + Ti = eltype(I) + T = eltype(V) + I2 = Ti[] + J2 = Ti[] + V2 = Tv[] + else + I2 = FilteredCooVector(one,I,J,I) + J2 = FilteredCooVector(one,I,J,J) + V2 = FilteredCooVector(zero,I,J,V) + end + A = f(I2,J2,V2,m,n) + if val_parameter(reuse) + K = precompute_nzindex(A,I,J) + return A,K + end + A +end + function precompute_nzindex(A,I,J) K = zeros(Int32,length(I)) for (p,(i,j)) in enumerate(zip(I,J)) + if i < 1 || j < 1 + continue + end K[p] = nzindex(A,i,j) end K end -function setcoofast!(A,V,K) +function sparse_matrix!(A,V,K) LinearAlgebra.fillstored!(A,0) A_nz = nonzeros(A) for (k,v) in zip(K,V) + if k < 1 + continue + end A_nz[k] += v end A From 2c7a0620aca00f5d12b9edde1fd2db1d833a01dd Mon Sep 17 00:00:00 2001 From: Francesc Verdugo Date: Fri, 9 Feb 2024 09:28:43 +0100 Subject: [PATCH 6/9] Allowing negative indices in coo vectors --- .../GalerkinToolkitExamples/src/example003.jl | 26 ++----------------- 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/extensions/GalerkinToolkitExamples/src/example003.jl b/extensions/GalerkinToolkitExamples/src/example003.jl index 623f398c..84348d9a 100644 --- a/extensions/GalerkinToolkitExamples/src/example003.jl +++ b/extensions/GalerkinToolkitExamples/src/example003.jl @@ -271,17 +271,7 @@ function assemble_sybmolic(state) for cell in 1:ncells dofs = cell_to_dofs[cell] ndofs = length(dofs) - for i in 1:ndofs - if !(dofs[i]>0) - continue - end - for j in 1:ndofs - if !(dofs[j]>0) - continue - end - n_coo += 1 - end - end + n_coo += ndofs*ndofs end I_coo = Vector{Int32}(undef,n_coo) @@ -294,14 +284,8 @@ function assemble_sybmolic(state) dofs = cell_to_dofs[cell] ndofs = length(dofs) for i in 1:ndofs - if !(dofs[i]>0) - continue - end dofs_i = dofs[i] for j in 1:ndofs - if !(dofs[j]>0) - continue - end n_coo += 1 I_coo[n_coo] = dofs_i J_coo[n_coo] = dofs[j] @@ -309,7 +293,7 @@ function assemble_sybmolic(state) end end - J,K = sparse_matrix(I_coo,J_coo,V_coo,n_dofs,n_dofs;reuse=true,skip_out_of_bounds=true) + J,K = sparse_matrix(I_coo,J_coo,V_coo,n_dofs,n_dofs;reuse=true) r,J,V_coo,K end @@ -569,13 +553,7 @@ function jacobian_cells!(V_coo,u_dofs,state) end # Set the result in the output array for i in 1:nl - if !(dofs[i]>0) - continue - end for j in 1:nl - if !(dofs[j]>0) - continue - end i_coo += 1 V_coo[i_coo] = Ae[i,j] end From c71110ec225fb3150ba75e96d5f2caf316f60466 Mon Sep 17 00:00:00 2001 From: Francesc Verdugo Date: Fri, 9 Feb 2024 09:38:59 +0100 Subject: [PATCH 7/9] Moving code to PartitionedArrays --- .../GalerkinToolkitExamples/src/example003.jl | 75 +------------------ .../test/example003_tests.jl | 1 + 2 files changed, 2 insertions(+), 74 deletions(-) diff --git a/extensions/GalerkinToolkitExamples/src/example003.jl b/extensions/GalerkinToolkitExamples/src/example003.jl index 84348d9a..c7934bf1 100644 --- a/extensions/GalerkinToolkitExamples/src/example003.jl +++ b/extensions/GalerkinToolkitExamples/src/example003.jl @@ -6,7 +6,7 @@ using StaticArrays using LinearAlgebra using SparseArrays using WriteVTK -using PartitionedArrays: JaggedArray, nzindex, val_parameter +using PartitionedArrays: JaggedArray, sparse_matrix, sparse_matrix! using TimerOutputs using NLsolve using Random @@ -20,7 +20,6 @@ using IterativeSolvers: cg! function main(params_in) - # Process params params_default = default_params() params = add_default_params(params_in,params_default) @@ -297,78 +296,6 @@ function assemble_sybmolic(state) r,J,V_coo,K end -struct FilteredCooVector{F,A,B,C,T} <: AbstractVector{T} - f::F - I::A - J::B - V::C - function FilteredCooVector(f::F,I::A,J::B,V::C) where {F,A,B,C} - T = eltype(C) - new{F,A,B,C,T}(f,I,J,V) - end -end -Base.size(a::FilteredCooVector) = size(a.V) -Base.IndexStyle(::Type{<:FilteredCooVector}) = IndexLinear() -Base.@propagate_inbounds function Base.getindex(a::FilteredCooVector,k::Int) - i = a.I[k] - j = a.J[k] - v = a.V[k] - if i < 1 || j < 1 - return a.f(v) - end - v -end - -function sparse_matrix(I,J,V,m,n;kwargs...) - sparse_matrix(sparse,I,J,V,m,n;kwargs...) -end -function sparse_matrix(f,I,J,V,m,n;reuse=Val(false),skip_out_of_bounds=true) - if !skip_out_of_bounds - I2 = I - J2 = J - V2 = V - elseif m*n == 0 - Ti = eltype(I) - T = eltype(V) - I2 = Ti[] - J2 = Ti[] - V2 = Tv[] - else - I2 = FilteredCooVector(one,I,J,I) - J2 = FilteredCooVector(one,I,J,J) - V2 = FilteredCooVector(zero,I,J,V) - end - A = f(I2,J2,V2,m,n) - if val_parameter(reuse) - K = precompute_nzindex(A,I,J) - return A,K - end - A -end - -function precompute_nzindex(A,I,J) - K = zeros(Int32,length(I)) - for (p,(i,j)) in enumerate(zip(I,J)) - if i < 1 || j < 1 - continue - end - K[p] = nzindex(A,i,j) - end - K -end - -function sparse_matrix!(A,V,K) - LinearAlgebra.fillstored!(A,0) - A_nz = nonzeros(A) - for (k,v) in zip(K,V) - if k < 1 - continue - end - A_nz[k] += v - end - A -end - @inline function p_laplace_residual(∇u,∇dv,dv,f,p) ∇dv⋅((norm(∇u)^(p-2))*∇u) - f*dv end diff --git a/extensions/GalerkinToolkitExamples/test/example003_tests.jl b/extensions/GalerkinToolkitExamples/test/example003_tests.jl index a9b09be3..e6c5527e 100644 --- a/extensions/GalerkinToolkitExamples/test/example003_tests.jl +++ b/extensions/GalerkinToolkitExamples/test/example003_tests.jl @@ -41,6 +41,7 @@ linear_solver = Example001.cg_amg_solver(;verbose=true) params[:solver] = Example003.nlsolve_solver(;timer,linear_solver,show_trace=true,method=:newton) params[:export_vtu] = false params[:timer] = timer +params[:p] = 2 Example003.main(params) end # module From d7b56fb7754f76db9868ae871cc8b30304b69763 Mon Sep 17 00:00:00 2001 From: Francesc Verdugo Date: Fri, 9 Feb 2024 10:31:16 +0100 Subject: [PATCH 8/9] Implemented autodiff in example003 --- .../GalerkinToolkitExamples/src/example003.jl | 52 ++++++++++++++--- .../test/example003_tests.jl | 57 +++++++++++++++---- 2 files changed, 88 insertions(+), 21 deletions(-) diff --git a/extensions/GalerkinToolkitExamples/src/example003.jl b/extensions/GalerkinToolkitExamples/src/example003.jl index c7934bf1..e606fe08 100644 --- a/extensions/GalerkinToolkitExamples/src/example003.jl +++ b/extensions/GalerkinToolkitExamples/src/example003.jl @@ -68,6 +68,7 @@ function default_params() params[:solver] = nlsolve_solver() params[:example_path] = joinpath(outdir,"example003") params[:export_vtu] = true + params[:autodiff] = :hand params[:timer] = TimerOutput() params end @@ -214,7 +215,19 @@ function setup(params) @timeit timer "user_funs" user_funs = setup_user_funs(params) solver = params[:solver] p = params[:p] - state = (;p,timer,solver,dirichlet_bcs,neumann_bcs,cell_integration,cell_isomap,face_integration,face_isomap,user_funs,dofs) + if params[:autodiff] === :hand + flux = flux_hand + dflux = dflux_hand + elseif params[:autodiff] === :flux + flux = flux_hand + dflux = dflux_from_flux + elseif params[:autodiff] === :energy + flux = flux_from_energy + dflux = dflux_from_energy + else + error("not implemented: autodiff == $autodiff") + end + state = (;p,timer,flux,dflux,solver,dirichlet_bcs,neumann_bcs,cell_integration,cell_isomap,face_integration,face_isomap,user_funs,dofs) end function add_basic_info(results,params,state) @@ -296,18 +309,38 @@ function assemble_sybmolic(state) r,J,V_coo,K end -@inline function p_laplace_residual(∇u,∇dv,dv,f,p) - ∇dv⋅((norm(∇u)^(p-2))*∇u) - f*dv -end - -@inline function p_laplace_jacobian(∇u,∇du,∇dv,p) +@inline flux_hand(∇u,p) = (norm(∇u)^(p-2))*∇u +@inline function dflux_hand(∇u,∇du,p) pm2 = p-2 pm4 = p-4 - ∇dv⋅((norm(∇u)^pm2)*∇du) + ∇dv⋅(pm2*(norm(∇u)^pm4)*(∇u⋅∇du)*∇u) + ((norm(∇u)^pm2)*∇du) + (pm2*(norm(∇u)^pm4)*(∇u⋅∇du)*∇u) +end +@inline function dflux_from_flux(∇u,∇du,p) + f(x) = flux_hand(x,p) + x = ∇u + dx = ∇du + dfdx = ForwardDiff.jacobian(f,x) + dfdx*dx +end + +@inline energy(∇u,p) = (1/p)*(norm(∇u)^p) +@inline function flux_from_energy(∇u,p) + f(x) = energy(x,p) + x = ∇u + dfdx = ForwardDiff.gradient(f,x) + dfdx +end +@inline function dflux_from_energy(∇u,∇du,p) + f(x) = energy(x,p) + x = ∇u + dx = ∇du + dfdx = ForwardDiff.hessian(f,x) + dfdx*dx end function residual_cells!(r,u_dofs,state) + flux = state.flux cell_to_dofs = state.dofs.cell_to_dofs cell_to_nodes = state.cell_isomap.face_to_nodes cell_to_rid = state.cell_isomap.face_to_rid @@ -385,7 +418,7 @@ function residual_cells!(r,u_dofs,state) for k in 1:nl dv = ste[k,iq] ∇dv = ∇xe[k] - fe[k] += ( p_laplace_residual(∇u,∇dv,dv,fx,p) )*dV + fe[k] += ( ∇dv⋅flux(∇u,p) - fx*dv )*dV end end for i in 1:nl @@ -399,6 +432,7 @@ end function jacobian_cells!(V_coo,u_dofs,state) + dflux = state.dflux cell_to_dofs = state.dofs.cell_to_dofs cell_to_nodes = state.cell_isomap.face_to_nodes cell_to_rid = state.cell_isomap.face_to_rid @@ -474,7 +508,7 @@ function jacobian_cells!(V_coo,u_dofs,state) ∇du = ∇xe[j] for i in 1:nl ∇dv = ∇xe[i] - Ae[i,j] += ( p_laplace_jacobian(∇u,∇du,∇dv,p) )*dV + Ae[i,j] += ( ∇dv⋅dflux(∇u,∇du,p) )*dV end end end diff --git a/extensions/GalerkinToolkitExamples/test/example003_tests.jl b/extensions/GalerkinToolkitExamples/test/example003_tests.jl index e6c5527e..5e9dfbcc 100644 --- a/extensions/GalerkinToolkitExamples/test/example003_tests.jl +++ b/extensions/GalerkinToolkitExamples/test/example003_tests.jl @@ -13,6 +13,21 @@ results = Example003.main(params) @test results[:eh1] < tol @test results[:el2] < tol +params[:autodiff] = :hand +results = Example003.main(params) +@test results[:eh1] < tol +@test results[:el2] < tol + +params[:autodiff] = :flux +results = Example003.main(params) +@test results[:eh1] < tol +@test results[:el2] < tol + +params[:autodiff] = :energy +results = Example003.main(params) +@test results[:eh1] < tol +@test results[:el2] < tol + params = Dict{Symbol,Any}() params[:mesh] = gk.cartesian_mesh((0,3,0,2),(5,10),complexify=false) params[:dirichlet_tags] = ["1-face-1","1-face-3","1-face-4"] @@ -25,23 +40,41 @@ results = Example003.main(params) @test results[:el2] < tol @test results[:ncells] == 10*5 -params = Dict{Symbol,Any}() +mesh = gk.cartesian_mesh((0,3,0,2,0,1),(5,5,5)) timer = TimerOutput() -@timeit timer "cartesian_mesh" params[:mesh] = gk.cartesian_mesh((0,3,0,2,0,1),(5,5,5)) linear_solver = Example001.cg_amg_solver(;verbose=true) -params[:solver] = Example003.nlsolve_solver(;timer,linear_solver,show_trace=true,method=:newton) -params[:export_vtu] = false -params[:timer] = timer -Example003.main(params) +solver = Example003.nlsolve_solver(;timer,linear_solver,show_trace=true,method=:newton) params = Dict{Symbol,Any}() -timer = TimerOutput() -@timeit timer "cartesian_mesh" params[:mesh] = gk.cartesian_mesh((0,3,0,2,0,1),(70,70,70)) -linear_solver = Example001.cg_amg_solver(;verbose=true) -params[:solver] = Example003.nlsolve_solver(;timer,linear_solver,show_trace=true,method=:newton) -params[:export_vtu] = false params[:timer] = timer -params[:p] = 2 +params[:solver] = solver +params[:mesh] = mesh +params[:export_vtu] = false +params[:autodiff] = :hand +Example003.main(params) + +reset_timer!(timer) +params[:autodiff] = :flux +Example003.main(params) + +reset_timer!(timer) +params[:autodiff] = :energy Example003.main(params) +mesh = gk.cartesian_mesh((0,3,0,2,0,1),(70,70,70)) +reset_timer!(timer) +params[:mesh] = mesh + +params[:autodiff] = :hand +Example003.main(params) + +reset_timer!(timer) +params[:autodiff] = :flux +Example003.main(params) + +reset_timer!(timer) +params[:autodiff] = :energy +Example003.main(params) + + end # module From 992f193653919609b188a060cbc9012fd8080ca8 Mon Sep 17 00:00:00 2001 From: Francesc Verdugo Date: Fri, 9 Feb 2024 10:39:54 +0100 Subject: [PATCH 9/9] Depend on PartitionedArrays 0.4.2 --- .../GalerkinToolkitExamples/src/example003.jl | 75 ++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/extensions/GalerkinToolkitExamples/src/example003.jl b/extensions/GalerkinToolkitExamples/src/example003.jl index e606fe08..b5145137 100644 --- a/extensions/GalerkinToolkitExamples/src/example003.jl +++ b/extensions/GalerkinToolkitExamples/src/example003.jl @@ -6,7 +6,7 @@ using StaticArrays using LinearAlgebra using SparseArrays using WriteVTK -using PartitionedArrays: JaggedArray, sparse_matrix, sparse_matrix! +using PartitionedArrays: JaggedArray, val_parameter, nzindex using TimerOutputs using NLsolve using Random @@ -696,4 +696,77 @@ function export_results(uh,params,state) nothing end +# TODO move this to partitioned arrays +struct FilteredCooVector{F,A,B,C,T} <: AbstractVector{T} + f::F + I::A + J::B + V::C + function FilteredCooVector(f::F,I::A,J::B,V::C) where {F,A,B,C} + T = eltype(C) + new{F,A,B,C,T}(f,I,J,V) + end +end +Base.size(a::FilteredCooVector) = size(a.V) +Base.IndexStyle(::Type{<:FilteredCooVector}) = IndexLinear() +Base.@propagate_inbounds function Base.getindex(a::FilteredCooVector,k::Int) + i = a.I[k] + j = a.J[k] + v = a.V[k] + if i < 1 || j < 1 + return a.f(v) + end + v +end + +function sparse_matrix(I,J,V,m,n;kwargs...) + sparse_matrix(sparse,I,J,V,m,n;kwargs...) +end +function sparse_matrix(f,I,J,V,m,n;reuse=Val(false),skip_out_of_bounds=true) + if !skip_out_of_bounds + I2 = I + J2 = J + V2 = V + elseif m*n == 0 + Ti = eltype(I) + T = eltype(V) + I2 = Ti[] + J2 = Ti[] + V2 = Tv[] + else + I2 = FilteredCooVector(one,I,J,I) + J2 = FilteredCooVector(one,I,J,J) + V2 = FilteredCooVector(zero,I,J,V) + end + A = f(I2,J2,V2,m,n) + if val_parameter(reuse) + K = precompute_nzindex(A,I,J) + return A,K + end + A +end + +function precompute_nzindex(A,I,J) + K = zeros(Int32,length(I)) + for (p,(i,j)) in enumerate(zip(I,J)) + if i < 1 || j < 1 + continue + end + K[p] = nzindex(A,i,j) + end + K +end + +function sparse_matrix!(A,V,K) + LinearAlgebra.fillstored!(A,0) + A_nz = nonzeros(A) + for (k,v) in zip(K,V) + if k < 1 + continue + end + A_nz[k] += v + end + A +end + end # module