diff --git a/extensions/GalerkinToolkitExamples/src/GalerkinToolkitExamples.jl b/extensions/GalerkinToolkitExamples/src/GalerkinToolkitExamples.jl index aa1d3e99..74b568f2 100644 --- a/extensions/GalerkinToolkitExamples/src/GalerkinToolkitExamples.jl +++ b/extensions/GalerkinToolkitExamples/src/GalerkinToolkitExamples.jl @@ -3,5 +3,6 @@ module GalerkinToolkitExamples include("example001.jl") include("example002.jl") include("example003.jl") +include("example004.jl") end # module diff --git a/extensions/GalerkinToolkitExamples/src/example003.jl b/extensions/GalerkinToolkitExamples/src/example003.jl index b5145137..ddae28ee 100644 --- a/extensions/GalerkinToolkitExamples/src/example003.jl +++ b/extensions/GalerkinToolkitExamples/src/example003.jl @@ -6,7 +6,7 @@ using StaticArrays using LinearAlgebra using SparseArrays using WriteVTK -using PartitionedArrays: JaggedArray, val_parameter, nzindex +using PartitionedArrays: JaggedArray, sparse_matrix, sparse_matrix! using TimerOutputs using NLsolve using Random @@ -65,7 +65,7 @@ function default_params() params[:neumann_tags] = String[] params[:integration_degree] = 2 params[:p] = 2 - params[:solver] = nlsolve_solver() + params[:solver] = nlsolve_solver(;method=:newton) params[:example_path] = joinpath(outdir,"example003") params[:export_vtu] = true params[:autodiff] = :hand @@ -696,77 +696,4 @@ function export_results(uh,params,state) nothing end -# TODO move this to partitioned arrays -struct FilteredCooVector{F,A,B,C,T} <: AbstractVector{T} - f::F - I::A - J::B - V::C - function FilteredCooVector(f::F,I::A,J::B,V::C) where {F,A,B,C} - T = eltype(C) - new{F,A,B,C,T}(f,I,J,V) - end -end -Base.size(a::FilteredCooVector) = size(a.V) -Base.IndexStyle(::Type{<:FilteredCooVector}) = IndexLinear() -Base.@propagate_inbounds function Base.getindex(a::FilteredCooVector,k::Int) - i = a.I[k] - j = a.J[k] - v = a.V[k] - if i < 1 || j < 1 - return a.f(v) - end - v -end - -function sparse_matrix(I,J,V,m,n;kwargs...) - sparse_matrix(sparse,I,J,V,m,n;kwargs...) -end -function sparse_matrix(f,I,J,V,m,n;reuse=Val(false),skip_out_of_bounds=true) - if !skip_out_of_bounds - I2 = I - J2 = J - V2 = V - elseif m*n == 0 - Ti = eltype(I) - T = eltype(V) - I2 = Ti[] - J2 = Ti[] - V2 = Tv[] - else - I2 = FilteredCooVector(one,I,J,I) - J2 = FilteredCooVector(one,I,J,J) - V2 = FilteredCooVector(zero,I,J,V) - end - A = f(I2,J2,V2,m,n) - if val_parameter(reuse) - K = precompute_nzindex(A,I,J) - return A,K - end - A -end - -function precompute_nzindex(A,I,J) - K = zeros(Int32,length(I)) - for (p,(i,j)) in enumerate(zip(I,J)) - if i < 1 || j < 1 - continue - end - K[p] = nzindex(A,i,j) - end - K -end - -function sparse_matrix!(A,V,K) - LinearAlgebra.fillstored!(A,0) - A_nz = nonzeros(A) - for (k,v) in zip(K,V) - if k < 1 - continue - end - A_nz[k] += v - end - A -end - end # module diff --git a/extensions/GalerkinToolkitExamples/src/example004.jl b/extensions/GalerkinToolkitExamples/src/example004.jl new file mode 100644 index 00000000..27841b10 --- /dev/null +++ b/extensions/GalerkinToolkitExamples/src/example004.jl @@ -0,0 +1,771 @@ +module Example004 + +import GalerkinToolkit as gk +import ForwardDiff +using StaticArrays +using LinearAlgebra +using SparseArrays +using WriteVTK +using PartitionedArrays +using TimerOutputs +using NLsolve +using Random +using GalerkinToolkitExamples: Example001 + +using Preconditioners +using IterativeSolvers: cg! + +# This one implements a vanilla sequential iso-parametric p-Laplacian solver by only +# using the mesh interface. + +function main(params_in) + + # Process params + params_default = default_params() + params = add_default_params(params_in,params_default) + results = Dict{Symbol,Any}() + timer = params[:timer] + + # Setup main data structures + @timeit timer "setup" state = setup(params) + add_basic_info(results,params,state) + + @timeit timer "solve_problem" x = solve_problem(params,state) + + # Post process + @timeit timer "setup_uh" uh = setup_uh(x,state) + @timeit timer "integrate_error_norms" integrate_error_norms(results,uh,state) + @timeit timer "export_results" export_results(uh,params,state) + + map_main(state.local_states) do local_state + print_timer(local_state.timer,allocations=false) + end + + results +end + +function add_default_params(params_in,params_default) + UmD = setdiff(keys(params_in),keys(params_default)) + for k in UmD + @warn "Parameter with key :$k is unused" + end + UiD = intersect(keys(params_default),keys(params_in)) + DmU = setdiff(keys(params_default),keys(params_in)) + a = [ k=>params_in[k] for k in UiD] + b = [ k=>params_default[k] for k in DmU] + Dict(vcat(a,b)) +end + +function default_params() + np = 4 + parts = DebugArray(LinearIndices((np,))) + domain = (0,1,0,1) + cells_per_dir = (10,10) + parts_per_dir = (2,2) + ghost_layers = 0 + mesh = gk.cartesian_mesh(domain,cells_per_dir,parts_per_dir;parts,ghost_layers) + outdir = mkpath(joinpath(@__DIR__,"..","output")) + params = Dict{Symbol,Any}() + params[:mesh] = mesh + params[:u] = (x) -> sum(x) + params[:f] = (x) -> 0.0 + params[:g] = (x) -> 0.0 + params[:dirichlet_tags] = ["boundary"] + params[:neumann_tags] = String[] + params[:integration_degree] = 2 + params[:p] = 2 + params[:solver] = nlsolve_solver(;method=:newton,show_trace=true) + params[:example_path] = joinpath(outdir,"example004") + params[:export_vtu] = true + params[:autodiff] = :hand + params[:timer] = TimerOutput() + params +end + +function nlsolve_solver(;linear_solver=Example001.lu_solver(),timer=TimerOutput(),options...) + function setup(x0,nlp) + @timeit timer "linearize" r0,J0,cache = nlp.linearize(x0) + dx = similar(r0,axes(J0,2)) # TODO is there any way of reusing this in the nonlinear solve? + @timeit timer "linear_solver_setup" ls_setup = linear_solver.setup(dx,J0,r0) + function linsolve(x,A,b) + # TODO we dont need to re-setup for the first + # linear solve. + # This can be avoided with a Ref{Bool} shared + # between this function and j! + @timeit timer "linear_solver_setup!" linear_solver.setup!(ls_setup,A) + @timeit timer "linear_solver_solve!" linear_solver.solve!(x,ls_setup,b) + x + end + f!(r,x) = nlp.residual!(r,x,cache) + j!(J,x) = nlp.jacobian!(J,x,cache) + df = OnceDifferentiable(f!,j!,x0,r0,J0) + (;df,linsolve,J0,cache,ls_setup,linear_solver) + end + function solve!(x,setup) + (;df,linsolve) = setup + result = nlsolve(df,x;linsolve,options...) + x .= result.zero + nothing + end + function setup!(setup,x0,nlp) + error("todo") + end + function finalize!(setup) + setup.linear_solver.finalize!(setup.ls_setup) + end + (;setup,solve!,setup!,finalize!) +end + +function setup_dirichlet_bcs(params) + mesh = params[:mesh] + u = params[:u] + dirichlet_tags = params[:dirichlet_tags] + node_to_tag = zeros(gk.num_nodes(mesh)) + tag_to_name = dirichlet_tags + gk.classify_mesh_nodes!(node_to_tag,mesh,tag_to_name) + free_and_dirichlet_nodes = gk.partition_from_mask(i->i==0,node_to_tag) + node_to_x = gk.node_coordinates(mesh) + dirichlet_nodes = last(free_and_dirichlet_nodes) + x_dirichlet = view(node_to_x,dirichlet_nodes) + u_dirichlet = u.(x_dirichlet) + dirichlet_bcs = (;free_and_dirichlet_nodes,u_dirichlet,node_to_tag) +end + +function setup_integration(params,objects) + mesh = params[:mesh] + degree = params[:integration_degree] + D = gk.num_dims(mesh) + if objects === :cells + d = D + elseif objects === :faces + d = D-1 + else + error("") + end + # Integration + ref_cells = gk.reference_faces(mesh,d) + face_to_rid = gk.face_reference_id(mesh,d) + integration_rules = map(ref_cells) do ref_cell + gk.default_quadrature(gk.geometry(ref_cell),degree) + end + rid_to_weights = map(gk.weights,integration_rules) + rid_to_coords = map(gk.coordinates,integration_rules) + integration = (;rid_to_weights,rid_to_coords,face_to_rid,d) +end + +function setup_isomap(params,integration) + rid_to_coords = integration.rid_to_coords + face_to_rid = integration.face_to_rid + d = integration.d + mesh = params[:mesh] + ref_cells = gk.reference_faces(mesh,d) + shape_funs = map(rid_to_coords,ref_cells) do q,ref_cell + shape_vals = gk.tabulator(ref_cell)(gk.value,q) + shape_grads = gk.tabulator(ref_cell)(ForwardDiff.gradient,q) + shape_vals, shape_grads + end + rid_to_shape_vals = map(first,shape_funs) + rid_to_shape_grads = map(last,shape_funs) + face_to_nodes = JaggedArray(gk.face_nodes(mesh,d)) + node_to_coords = gk.node_coordinates(mesh) + isomap = (;face_to_nodes,node_to_coords,face_to_rid,rid_to_shape_vals,rid_to_shape_grads,d) +end + +function setup_neumann_bcs(params) + mesh = params[:mesh] + neumann_tags = params[:neumann_tags] + neum_face_to_face = Int[] + if length(neumann_tags) != 0 + D = gk.num_dims(mesh) + tag_to_groups = gk.physical_faces(mesh,D-1) + neum_face_to_face = reduce(union,map(tag->tag_to_groups[tag],neumann_tags)) + end + (;neum_face_to_face) +end + +function setup_user_funs(params) + u = params[:u] + f = params[:f] + g = params[:g] + user_funs = (;u,f,g) +end + +function setup(params) + mesh = params[:mesh] + timer = params[:timer] + local_states_0 = map(partition(mesh)) do mesh + local_params = copy(params) + local_params[:mesh] = mesh + local_setup(local_params,timer) + end + @timeit timer "setup_dofs" dofs,local_dofs = setup_dofs(params,local_states_0) + local_states = map((a,dofs)->(;dofs,a...),local_states_0,local_dofs) + state = (;local_states,dofs,timer) + state +end + +function setup_dofs(params,local_states) + mesh = params[:mesh] + node_partition = gk.node_partition(mesh) + global_node_to_mask = pfill(false,node_partition) + function fillmask!(node_to_mask,state) + free_nodes = first(state.dirichlet_bcs.free_and_dirichlet_nodes) + node_to_mask[free_nodes] .= true + end + map(fillmask!,partition(global_node_to_mask),local_states) + global_dof_to_node, global_node_to_dof = find_local_indices(global_node_to_mask) + dof_partition = partition(axes(global_dof_to_node,1)) + function setup_local_dofs(state,node_to_dof) + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_free_node = gk.permutation(free_and_dirichlet_nodes) + n_free = length(first(free_and_dirichlet_nodes)) + n_dofs = n_free + cell_to_nodes = state.cell_isomap.face_to_nodes + face_to_nodes = state.face_isomap.face_to_nodes + function map_node_to_dof(node) + free_node = node_to_free_node[node] + if free_node > n_free + return Int(-(free_node-n_free)) + end + dof = node_to_dof[node] + Int(dof) + end + cell_to_dofs_data = map_node_to_dof.(cell_to_nodes.data) + face_to_dofs_data = map_node_to_dof.(face_to_nodes.data) + cell_to_dofs = JaggedArray(cell_to_dofs_data,cell_to_nodes.ptrs) + face_to_dofs = JaggedArray(face_to_dofs_data,face_to_nodes.ptrs) + (;cell_to_dofs,face_to_dofs,n_dofs) + end + local_dofs = map(setup_local_dofs,local_states,partition(global_node_to_dof)) + (;dof_partition,global_node_to_dof,global_dof_to_node), local_dofs +end + +function local_setup(params,timer) + @timeit timer "dirichlet_bcs" dirichlet_bcs = setup_dirichlet_bcs(params) + @timeit timer "neumann_bcs" neumann_bcs = setup_neumann_bcs(params) + @timeit timer "cell_integration" cell_integration = setup_integration(params,:cells) + @timeit timer "face_integration" face_integration = setup_integration(params,:faces) + @timeit timer "cell_isomap" cell_isomap = setup_isomap(params,cell_integration) + @timeit timer "face_isomap" face_isomap = setup_isomap(params,face_integration) + @timeit timer "user_funs" user_funs = setup_user_funs(params) + solver = params[:solver] + p = params[:p] + if params[:autodiff] === :hand + flux = flux_hand + dflux = dflux_hand + elseif params[:autodiff] === :flux + flux = flux_hand + dflux = dflux_from_flux + elseif params[:autodiff] === :energy + flux = flux_from_energy + dflux = dflux_from_energy + else + error("not implemented: autodiff == $autodiff") + end + state = (;p,timer,flux,dflux,solver,dirichlet_bcs,neumann_bcs,cell_integration,cell_isomap,face_integration,face_isomap,user_funs) +end + +function add_basic_info(results,params,state) + mesh = params[:mesh] + nfree = length(state.dofs.global_dof_to_node) + nnodes = gk.num_nodes(mesh) + ncells = gk.num_faces(mesh,gk.num_dims(mesh)) + results[:nfree] = nfree + results[:nnodes] = nnodes + results[:ncells] = ncells + results +end + +# In the final interface, a non-linear problem +# will have function size and axes +function nonlinear_problem(state) + timer = state.timer + function initial() + dof_partition = partition(axes(state.dofs.global_dof_to_node,1)) + Random.seed!(1) + prand(Float64,dof_partition) + end + function linearize(u) + t = consistent!(u) + I,J,V = map(assemble_symbolic_local,state.local_states) |> tuple_of_arrays + r = similar(u) + fill!(r,0) + wait(t) + map(residual_local!,partition(r),partition(u),state.local_states) + t = assemble!(r) + map(jacobian_local!,V,partition(u),state.local_states) + wait(t) + dof_partition = partition(axes(u,1)) + indices = :local + subassembled = true + reuse = true + row_partition = map(remove_ghost,dof_partition) + assembled_rows = row_partition + @timeit timer "psparse" J,J_cache = psparse(I,J,V,dof_partition,dof_partition;subassembled,indices,reuse,assembled_rows) |> fetch + cache = (V,J_cache) + r,J,cache + end + function residual!(r,u,cache) + t = consistent!(u) + fill!(r,0) + wait(t) + map(residual_local!,partition(r),partition(u),state.local_states) + assemble!(r) |> wait + r + end + function jacobian!(J,u,cache) + (V,J_cache) = cache + consistent!(u) |> wait + map(jacobian_local!,V,partition(u),state.local_states) + @timeit timer "psparse!" psparse!(J,V,J_cache) |> wait + J + end + (;initial,linearize,residual!,jacobian!) +end + +function assemble_symbolic_local(state) + timer = state.timer + @timeit timer "assemble_symbolic" assemble_symbolic(state) +end + +function assemble_symbolic(state) + cell_to_dofs = state.dofs.cell_to_dofs + n_dofs = state.dofs.n_dofs + n_coo = 0 + ncells = length(cell_to_dofs) + for cell in 1:ncells + dofs = cell_to_dofs[cell] + ndofs = length(dofs) + n_coo += ndofs*ndofs + end + I_coo = Vector{Int32}(undef,n_coo) + J_coo = Vector{Int32}(undef,n_coo) + V_coo = Vector{Float64}(undef,n_coo) + n_coo = 0 + for cell in 1:ncells + dofs = cell_to_dofs[cell] + ndofs = length(dofs) + for i in 1:ndofs + dofs_i = dofs[i] + for j in 1:ndofs + n_coo += 1 + I_coo[n_coo] = dofs_i + J_coo[n_coo] = dofs[j] + end + end + end + I_coo,J_coo,V_coo +end + +function residual_local!(r,u_dofs,state) + timer = state.timer + @timeit timer "residual_cells!" residual_cells!(r,u_dofs,state) + @timeit timer "residual_faces!" residual_faces!(r,u_dofs,state) +end + +function jacobian_local!(V,u_dofs,state) + timer = state.timer + @timeit timer "jacobian_cells!" jacobian_cells!(V,u_dofs,state) +end + +@inline flux_hand(∇u,p) = (norm(∇u)^(p-2))*∇u + +@inline function dflux_hand(∇u,∇du,p) + pm2 = p-2 + pm4 = p-4 + ((norm(∇u)^pm2)*∇du) + (pm2*(norm(∇u)^pm4)*(∇u⋅∇du)*∇u) +end + +@inline function dflux_from_flux(∇u,∇du,p) + f(x) = flux_hand(x,p) + x = ∇u + dx = ∇du + dfdx = ForwardDiff.jacobian(f,x) + dfdx*dx +end + +@inline energy(∇u,p) = (1/p)*(norm(∇u)^p) + +@inline function flux_from_energy(∇u,p) + f(x) = energy(x,p) + x = ∇u + dfdx = ForwardDiff.gradient(f,x) + dfdx +end + +@inline function dflux_from_energy(∇u,∇du,p) + f(x) = energy(x,p) + x = ∇u + dx = ∇du + dfdx = ForwardDiff.hessian(f,x) + dfdx*dx +end + +function residual_cells!(r,u_dofs,state) + + flux = state.flux + cell_to_dofs = state.dofs.cell_to_dofs + cell_to_nodes = state.cell_isomap.face_to_nodes + cell_to_rid = state.cell_isomap.face_to_rid + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_x = state.cell_isomap.node_to_coords + ∇s = state.cell_isomap.rid_to_shape_grads + s = state.cell_isomap.rid_to_shape_vals + u_dirichlet = state.dirichlet_bcs.u_dirichlet + f = state.user_funs.f + w = state.cell_integration.rid_to_weights + d = state.cell_integration.d + u_dirichlet = state.dirichlet_bcs.u_dirichlet + ncells = length(cell_to_nodes) + p = state.p + + # Allocate auxiliary buffers + ∇x = map(i->similar(i,size(i,2)),∇s) + Aes = map(i->zeros(size(i,2),size(i,2)),∇s) + fes = map(i->zeros(size(i,2)),∇s) + ues = map(i->zeros(size(i,2)),∇s) + ∇st = map(m->collect(permutedims(m)),∇s) + st = map(m->collect(permutedims(m)),s) + + Tx = eltype(node_to_x) + TJ = typeof(zero(Tx)*zero(Tx)') + T = eltype(Tx) + + i_coo = 0 + for cell in 1:ncells + rid = cell_to_rid[cell] + nodes = cell_to_nodes[cell] + dofs = cell_to_dofs[cell] + Ae = Aes[rid] + fe = fes[rid] + ue = ues[rid] + nl = length(nodes) + ∇ste = ∇st[rid] + ∇xe = ∇x[rid] + ste = st[rid] + we = w[rid] + nq = length(we) + fill!(Ae,zero(eltype(Ae))) + fill!(fe,zero(eltype(Ae))) + # Integrate + for iq in 1:nq + Jt = zero(TJ) + xint = zero(Tx) + for k in 1:nl + x = node_to_x[nodes[k]] + ∇sqx = ∇ste[k,iq] + sqx = ste[k,iq] + Jt += ∇sqx*x' + xint += sqx*x + end + detJt = det(Jt) + invJt = inv(Jt) + dV = abs(detJt)*we[iq] + for k in 1:nl + dof_k = dofs[k] + if dof_k < 1 + uk = u_dirichlet[-dof_k] + ue[k] = uk + continue + end + ue[k] = u_dofs[dof_k] + end + ∇u = zero(Tx) + for k in 1:nl + uek = ue[k] + ∇xek = invJt*∇ste[k,iq] + ∇u += ∇xek*uek + ∇xe[k] = ∇xek + end + fx = f(xint) + for k in 1:nl + dv = ste[k,iq] + ∇dv = ∇xe[k] + fe[k] += ( ∇dv⋅flux(∇u,p) - fx*dv )*dV + end + end + for i in 1:nl + if !(dofs[i]>0) + continue + end + r[dofs[i]] += fe[i] + end + end +end + +function jacobian_cells!(V_coo,u_dofs,state) + + dflux = state.dflux + cell_to_dofs = state.dofs.cell_to_dofs + cell_to_nodes = state.cell_isomap.face_to_nodes + cell_to_rid = state.cell_isomap.face_to_rid + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_x = state.cell_isomap.node_to_coords + ∇s = state.cell_isomap.rid_to_shape_grads + s = state.cell_isomap.rid_to_shape_vals + u_dirichlet = state.dirichlet_bcs.u_dirichlet + f = state.user_funs.f + w = state.cell_integration.rid_to_weights + d = state.cell_integration.d + u_dirichlet = state.dirichlet_bcs.u_dirichlet + ncells = length(cell_to_nodes) + p = state.p + + # Allocate auxiliary buffers + ∇x = map(i->similar(i,size(i,2)),∇s) + Aes = map(i->zeros(size(i,2),size(i,2)),∇s) + fes = map(i->zeros(size(i,2)),∇s) + ues = map(i->zeros(size(i,2)),∇s) + ∇st = map(m->collect(permutedims(m)),∇s) + st = map(m->collect(permutedims(m)),s) + + Tx = eltype(node_to_x) + TJ = typeof(zero(Tx)*zero(Tx)') + T = eltype(Tx) + + i_coo = 0 + for cell in 1:ncells + rid = cell_to_rid[cell] + nodes = cell_to_nodes[cell] + dofs = cell_to_dofs[cell] + Ae = Aes[rid] + fe = fes[rid] + ue = ues[rid] + nl = length(nodes) + ∇ste = ∇st[rid] + ∇xe = ∇x[rid] + ste = st[rid] + we = w[rid] + nq = length(we) + fill!(Ae,zero(eltype(Ae))) + fill!(fe,zero(eltype(Ae))) + # Integrate + for iq in 1:nq + Jt = zero(TJ) + for k in 1:nl + x = node_to_x[nodes[k]] + ∇sqx = ∇ste[k,iq] + sqx = ste[k,iq] + Jt += ∇sqx*x' + end + detJt = det(Jt) + invJt = inv(Jt) + dV = abs(detJt)*we[iq] + for k in 1:nl + dof_k = dofs[k] + if dof_k < 1 + uk = u_dirichlet[-dof_k] + ue[k] = uk + continue + end + ue[k] = u_dofs[dof_k] + end + ∇u = zero(Tx) + for k in 1:nl + uek = ue[k] + ∇xek = invJt*∇ste[k,iq] + ∇u += ∇xek*uek + ∇xe[k] = ∇xek + end + for j in 1:nl + ∇du = ∇xe[j] + for i in 1:nl + ∇dv = ∇xe[i] + Ae[i,j] += ( ∇dv⋅dflux(∇u,∇du,p) )*dV + end + end + end + # Set the result in the output array + for i in 1:nl + for j in 1:nl + i_coo += 1 + V_coo[i_coo] = Ae[i,j] + end + end + end +end + +function residual_faces!(r,u_dofs,state) + + neum_face_to_face = state.neumann_bcs.neum_face_to_face + if length(neum_face_to_face) == 0 + return nothing + end + + face_to_rid = state.face_isomap.face_to_rid + face_to_dofs = state.dofs.face_to_dofs + face_to_nodes = state.face_isomap.face_to_nodes + ∇s_f = state.face_isomap.rid_to_shape_grads + s_f = state.face_isomap.rid_to_shape_vals + node_to_x = state.face_isomap.node_to_coords + g = state.user_funs.g + w_f = state.face_integration.rid_to_weights + d = state.cell_integration.d + + fes = map(i->zeros(size(i,2)),s_f) + Tx = SVector{d,Float64} + Ts = typeof(zero(SVector{d-1,Float64})*zero(Tx)') + + for face in neum_face_to_face + rid = face_to_rid[face] + nodes = face_to_nodes[face] + dofs = face_to_dofs[face] + ∇se = ∇s_f[rid] + se = s_f[rid] + we = w_f[rid] + nq = length(we) + fe = fes[rid] + fill!(fe,zero(eltype(fe))) + nl = length(nodes) + for iq in 1:nq + Jt = zero(Ts) + xint = zero(Tx) + for k in 1:nl + x = node_to_x[nodes[k]] + ∇sqx = ∇se[iq,k] + sqx = se[iq,k] + Jt += ∇sqx*x' + xint += sqx*x + end + J = transpose(Jt) + dS = sqrt(det(Jt*J))*we[iq] + gx = g(xint) + for k in 1:nl + fe[k] += gx*se[iq,k]*dS + end + end + for i in 1:nl + if !(dofs[i]>0) + continue + end + r[dofs[i]] -= fe[i] + end + end +end + +function solve_problem(params,state) + timer = state.timer + problem = nonlinear_problem(state) + solver = params[:solver] + x = problem.initial() + @timeit timer "solver.setup" setup = solver.setup(x,problem) + @timeit timer "solver.solve!" solver.solve!(x,setup) + @timeit timer "solver.finalize!" solver.finalize!(setup) + x +end + +function setup_uh(u,state) + consistent!(u) |> wait + function setup_local_uh(dof_to_x,state,node_to_dof) + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_x = state.cell_isomap.node_to_coords + free_node_to_node = first(free_and_dirichlet_nodes) + @views u_free = dof_to_x[node_to_dof[free_node_to_node]] + u_dirichlet = state.dirichlet_bcs.u_dirichlet + nnodes = length(node_to_x) + local_uh = zeros(nnodes) + local_uh[first(free_and_dirichlet_nodes)] = u_free + local_uh[last(free_and_dirichlet_nodes)] = u_dirichlet + local_uh + end + uh = map(setup_local_uh,partition(u),state.local_states,partition(state.dofs.global_node_to_dof)) + uh +end + +function integrate_error_norms(results,uh,state) + local_states = state.local_states + eh1², el2² = map(integrate_error_norms_loop,uh,local_states) |> tuple_of_arrays + eh1 = sqrt(sum(eh1²)) + el2 = sqrt(sum(el2²)) + results[:eh1] = eh1 + results[:el2] = el2 + results +end + +function integrate_error_norms_loop(uh,state) + + cell_to_nodes = state.cell_isomap.face_to_nodes + cell_to_rid = state.cell_isomap.face_to_rid + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_x = state.cell_isomap.node_to_coords + ∇s = state.cell_isomap.rid_to_shape_grads + s = state.cell_isomap.rid_to_shape_vals + u_dirichlet = state.dirichlet_bcs.u_dirichlet + u = state.user_funs.u + w = state.cell_integration.rid_to_weights + d = state.cell_integration.d + + eh1 = 0.0 + el2 = 0.0 + ncells = length(cell_to_rid) + ues = map(i->zeros(size(i,2)),∇s) + ∇x = map(i->similar(i,size(i,2)),∇s) + Tx = eltype(node_to_x) + TJ = typeof(zero(Tx)*zero(Tx)') + for cell in 1:ncells + rid = cell_to_rid[cell] + nodes = cell_to_nodes[cell] + ue = ues[rid] + nl = length(nodes) + ∇se = ∇s[rid] + ∇xe = ∇x[rid] + se = s[rid] + we = w[rid] + nq = length(we) + for k in 1:nl + nk = nodes[k] + ue[k] = uh[nk] + end + for iq in 1:nq + Jt = zero(TJ) + xint = zero(Tx) + for k in 1:nl + x = node_to_x[nodes[k]] + ∇sqx = ∇se[iq,k] + sqx = se[iq,k] + Jt += ∇sqx*x' + xint += sqx*x + end + detJt = det(Jt) + invJt = inv(Jt) + dV = abs(detJt)*we[iq] + for k in 1:nl + ∇xe[k] = invJt*∇se[iq,k] + end + ux = u(xint) + ∇ux = ForwardDiff.gradient(u,xint) + ∇uhx = zero(∇ux) + uhx = zero(ux) + for k in 1:nl + uek = ue[k] + ∇uhx += uek*∇xe[k] + uhx += uek*se[iq,k] + end + ∇ex = ∇ux - ∇uhx + ex = ux - uhx + eh1 += (∇ex⋅∇ex + ex*ex)*dV + el2 += ex*ex*dV + end + end + eh1, el2 +end + +function export_results(uh,params,state) + if ! params[:export_vtu] + return nothing + end + example_path = params[:example_path] + mesh = params[:mesh] + ranks = linear_indices(gk.node_partition(mesh)) + np = length(ranks) + map(partition(mesh),ranks,uh,gk.index_partition(mesh)) do mesh,rank,uh,ids + pvtk_grid(example_path,gk.vtk_args(mesh)...;part=rank,nparts=np) do vtk + gk.vtk_physical_faces!(vtk,mesh) + vtk["piece",VTKCellData()] = fill(rank,sum(gk.num_faces(mesh))) + vtk["owner",VTKPointData()] = local_to_owner(gk.node_indices(ids)) + vtk["uh",VTKPointData()] = uh + end + end +end + +end # module diff --git a/extensions/GalerkinToolkitExamples/test/example003_tests.jl b/extensions/GalerkinToolkitExamples/test/example003_tests.jl index 5e9dfbcc..55b454f6 100644 --- a/extensions/GalerkinToolkitExamples/test/example003_tests.jl +++ b/extensions/GalerkinToolkitExamples/test/example003_tests.jl @@ -8,7 +8,7 @@ using Test tol = 1.0e-10 params = Dict{Symbol,Any}() -params[:mesh] = gk.cartesian_mesh((0,1,0,1),(3,3)) +params[:mesh] = gk.cartesian_mesh((0,10,0,10),(2,2)) results = Example003.main(params) @test results[:eh1] < tol @test results[:el2] < tol diff --git a/extensions/GalerkinToolkitExamples/test/example004_defs.jl b/extensions/GalerkinToolkitExamples/test/example004_defs.jl new file mode 100644 index 00000000..4781edb3 --- /dev/null +++ b/extensions/GalerkinToolkitExamples/test/example004_defs.jl @@ -0,0 +1,59 @@ + +import GalerkinToolkit as gk +using GalerkinToolkitExamples: Example004, Example002 +using Test +using PartitionedArrays +using PetscCall +using TimerOutputs + +function example004_tests_np_4(distribute) + tol = 1.0e-8 + params = Dict{Symbol,Any}() + domain = (0,10,0,10) + cells_per_dir = (20,20) + parts_per_dir = (2,2) + np = prod(parts_per_dir) + parts = distribute(LinearIndices((np,))) + ghost_layers = 0 + mesh = gk.cartesian_mesh(domain,cells_per_dir,parts_per_dir;parts,ghost_layers) + params[:mesh] = mesh + results = Example004.main(params) + @test results[:eh1] < tol + @test results[:el2] < tol + + params = Dict{Symbol,Any}() + domain = (0,10,0,10,0,10) + cells_per_dir = (10,10,10) + parts_per_dir = (2,2,1) + np = prod(parts_per_dir) + parts = distribute(LinearIndices((np,))) + ghost_layers = 0 + mesh = gk.cartesian_mesh(domain,cells_per_dir,parts_per_dir;parts,ghost_layers) + params[:mesh] = mesh + results = Example004.main(params) + @test results[:eh1] < tol + @test results[:el2] < tol + + + params = Dict{Symbol,Any}() + domain = (0,10,0,10,0,10) + cells_per_dir = (50,50,50) + parts_per_dir = (2,2,1) + np = prod(parts_per_dir) + parts = distribute(LinearIndices((np,))) + ghost_layers = 0 + mesh = gk.cartesian_mesh(domain,cells_per_dir,parts_per_dir;parts,ghost_layers) + params[:mesh] = mesh + params[:export_vtu] = false + options = "-pc_type gamg -ksp_type cg -ksp_error_if_not_converged true -ksp_converged_reason -ksp_rtol 1.0e-6" + PetscCall.init(args=split(options)) + # TODO a more comfortable way of passing a timer? + linear_solver = Example002.ksp_solver() + timer = TimerOutput() + params[:solver] = Example004.nlsolve_solver(;timer,linear_solver,method=:newton,show_trace=true) + params[:timer] = timer + results = Example004.main(params) + @test results[:eh1] < tol + @test results[:el2] < tol + +end diff --git a/extensions/GalerkinToolkitExamples/test/example004_tests.jl b/extensions/GalerkinToolkitExamples/test/example004_tests.jl new file mode 100644 index 00000000..8c2283a0 --- /dev/null +++ b/extensions/GalerkinToolkitExamples/test/example004_tests.jl @@ -0,0 +1,6 @@ +module Example004Tests + +include("example004_defs.jl") +with_debug(example004_tests_np_4) + +end # module diff --git a/extensions/GalerkinToolkitExamples/test/mpi_array/example004_driver_np_4.jl b/extensions/GalerkinToolkitExamples/test/mpi_array/example004_driver_np_4.jl new file mode 100644 index 00000000..7f2f27cc --- /dev/null +++ b/extensions/GalerkinToolkitExamples/test/mpi_array/example004_driver_np_4.jl @@ -0,0 +1,8 @@ +module TMP + +using PartitionedArrays + +include(joinpath("..","example004_defs.jl")) +with_mpi(example004_tests_np_4) + +end # module diff --git a/extensions/GalerkinToolkitExamples/test/mpi_array/example004_tests.jl b/extensions/GalerkinToolkitExamples/test/mpi_array/example004_tests.jl new file mode 100644 index 00000000..406de231 --- /dev/null +++ b/extensions/GalerkinToolkitExamples/test/mpi_array/example004_tests.jl @@ -0,0 +1,11 @@ +module Example004Tests + +using MPI + +include("run_mpi_driver.jl") + +file = joinpath(@__DIR__,"example004_driver_np_4.jl") +run_mpi_driver(file;procs=4) + +end # module +