diff --git a/extensions/GalerkinToolkitExamples/Project.toml b/extensions/GalerkinToolkitExamples/Project.toml index 94faabe8..d295d8ba 100644 --- a/extensions/GalerkinToolkitExamples/Project.toml +++ b/extensions/GalerkinToolkitExamples/Project.toml @@ -8,7 +8,9 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" GalerkinToolkit = "5e3ba9c4-dd81-444d-b69a-0e7bd7bf60a4" IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" PartitionedArrays = "5a9dfac6-5c52-46f7-8278-5e2210713be9" +PetscCall = "1194c000-87c4-4102-b4a0-a6217ec4849e" Preconditioners = "af69fa37-3177-5a40-98ee-561f696e4fcd" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" diff --git a/extensions/GalerkinToolkitExamples/src/example001.jl b/extensions/GalerkinToolkitExamples/src/example001.jl index 2102326c..f58cb7dc 100644 --- a/extensions/GalerkinToolkitExamples/src/example001.jl +++ b/extensions/GalerkinToolkitExamples/src/example001.jl @@ -222,14 +222,14 @@ end function assemble_system(state) timer = state.timer - @timeit timer "assemble_sytem_coo" args = assemble_sytem_coo(state) + @timeit timer "assemble_system_coo" args = assemble_system_coo(state) I,J,V,b = args n_dofs = state.dofs.n_dofs @timeit timer "sparse" A = sparse(I,J,V,n_dofs,n_dofs) A,b end -function assemble_sytem_coo(state) +function assemble_system_coo(state) timer = state.timer @timeit timer "assemble_sybmolic" args = assemble_sybmolic(state) @timeit timer "assemble_numeric_cells!" assemble_numeric_cells!(args...,state) @@ -548,211 +548,4 @@ function export_results(uh,params,state) nothing end -### From here, it is old stuff to to be deleted - -function assemble_system_loop(state) - - cell_to_nodes = state.cell_isomap.face_to_nodes - cell_to_rid = state.cell_isomap.face_to_rid - free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes - node_to_x = state.cell_isomap.node_to_coords - ∇s = state.cell_isomap.rid_to_shape_grads - s = state.cell_isomap.rid_to_shape_vals - u_dirichlet = state.dirichlet_bcs.u_dirichlet - f = state.user_funs.f - w = state.cell_integration.rid_to_weights - d = state.cell_integration.d - - # Count coo entries - n_coo = 0 - ncells = length(cell_to_nodes) - node_to_free_node = gk.permutation(free_and_dirichlet_nodes) - nfree = length(first(free_and_dirichlet_nodes)) - for cell in 1:ncells - nodes = cell_to_nodes[cell] - for nj in nodes - if node_to_free_node[nj] > nfree - continue - end - for ni in nodes - if node_to_free_node[ni] > nfree - continue - end - n_coo += 1 - end - end - end - - # Allocate coo values - I_coo = zeros(Int32,n_coo) - J_coo = zeros(Int32,n_coo) - V_coo = zeros(Float64,n_coo) - b = zeros(nfree) - - # Allocate auxiliary buffers - ∇x = map(i->similar(i,size(i,2)),∇s) - Aes = map(i->zeros(size(i,2),size(i,2)),∇s) - fes = map(i->zeros(size(i,2)),∇s) - ues = map(i->zeros(size(i,2)),∇s) - - Tx = SVector{d,Float64} - TJ = typeof(zero(Tx)*zero(Tx)') - - # Fill coo values - i_coo = 0 - for cell in 1:ncells - rid = cell_to_rid[cell] - nodes = cell_to_nodes[cell] - Ae = Aes[rid] - ue = ues[rid] - fe = fes[rid] - nl = length(nodes) - ∇se = ∇s[rid] - ∇xe = ∇x[rid] - se = s[rid] - we = w[rid] - nq = length(we) - fill!(Ae,zero(eltype(Ae))) - fill!(fe,zero(eltype(Ae))) - fill!(ue,zero(eltype(Ae))) - for k in 1:nl - nk = nodes[k] - gk = node_to_free_node[nk] - if gk <= nfree - continue - end - ue[k] = u_dirichlet[gk-nfree] - end - for iq in 1:nq - Jt = zero(TJ) - xint = zero(Tx) - for k in 1:nl - x = node_to_x[nodes[k]] - ∇sqx = ∇se[iq,k] - sqx = se[iq,k] - Jt += ∇sqx*x' - xint += sqx*x - end - detJt = det(Jt) - invJt = inv(Jt) - dV = abs(detJt)*we[iq] - for k in 1:nl - ∇xe[k] = invJt*∇se[iq,k] - end - for j in 1:nl - for i in 1:nl - Ae[i,j] += ∇xe[i]⋅∇xe[j]*dV - end - end - fx = f(xint) - for k in 1:nl - fe[k] += fx*se[iq,k]*dV - end - end - for i in 1:nl - for j in 1:nl - fe[i] -= Ae[i,j]*ue[j] - end - end - for i in 1:nl - ni = nodes[i] - gi = node_to_free_node[ni] - if gi > nfree - continue - end - b[gi] += fe[i] - end - for j in 1:nl - nj = nodes[j] - gj = node_to_free_node[nj] - if gj > nfree - continue - end - for i in 1:nl - ni = nodes[i] - gi = node_to_free_node[ni] - if gi > nfree - continue - end - i_coo += 1 - I_coo[i_coo] = gi - J_coo[i_coo] = gj - V_coo[i_coo] = Ae[i,j] - end - end - end - add_neumann_bcs!(b,state) - I_coo,J_coo,V_coo,b -end - -function add_neumann_bcs!(b,state) - - neum_face_to_face = state.neumann_bcs.neum_face_to_face - if length(neum_face_to_face) == 0 - return nothing - end - face_to_rid = state.face_isomap.face_to_rid - face_to_nodes = state.face_isomap.face_to_nodes - ∇s_f = state.face_isomap.rid_to_shape_grads - s_f = state.face_isomap.rid_to_shape_vals - node_to_x = state.face_isomap.node_to_coords - free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes - g = state.user_funs.g - w_f = state.face_integration.rid_to_weights - d = state.cell_integration.d - - fes = map(i->zeros(size(i,2)),s_f) - Tx = SVector{d,Float64} - Ts = typeof(zero(SVector{d-1,Float64})*zero(Tx)') - node_to_free_node = gk.permutation(free_and_dirichlet_nodes) - nfree = length(first(free_and_dirichlet_nodes)) - - for face in neum_face_to_face - rid = face_to_rid[face] - nodes = face_to_nodes[face] - ∇se = ∇s_f[rid] - se = s_f[rid] - we = w_f[rid] - nq = length(we) - fe = fes[rid] - fill!(fe,zero(eltype(fe))) - nl = length(nodes) - for iq in 1:nq - Jt = zero(Ts) - xint = zero(Tx) - for k in 1:nl - x = node_to_x[nodes[k]] - ∇sqx = ∇se[iq,k] - sqx = se[iq,k] - Jt += ∇sqx*x' - xint += sqx*x - end - J = transpose(Jt) - dS = sqrt(det(Jt*J))*we[iq] - gx = g(xint) - for k in 1:nl - fe[k] += gx*se[iq,k]*dS - end - end - for i in 1:nl - ni = nodes[i] - gi = node_to_free_node[ni] - if gi > nfree - continue - end - b[gi] += fe[i] - end - end - - nothing -end - -function assemble_system_old(state) - I,J,V,b = assemble_system_loop(state) - nfree = length(b) - A = sparse(I,J,V,nfree,nfree) - A,b -end - - end # module diff --git a/extensions/GalerkinToolkitExamples/src/example002.jl b/extensions/GalerkinToolkitExamples/src/example002.jl index 9b816805..02b31e53 100644 --- a/extensions/GalerkinToolkitExamples/src/example002.jl +++ b/extensions/GalerkinToolkitExamples/src/example002.jl @@ -1,7 +1,6 @@ module Example002 import GalerkinToolkit as gk -import GalerkinToolkitExamples: Example001 import ForwardDiff using StaticArrays using LinearAlgebra @@ -9,30 +8,36 @@ using SparseArrays using WriteVTK using PartitionedArrays using TimerOutputs +using PetscCall function main(params_in) - timer = TimerOutput() - # Dict to collect results results = Dict{Symbol,Any}() # Process params params_default = default_params() params = add_default_params(params_in,params_default) + parts = linear_indices(partition(params[:mesh])) + + timer = TimerOutput() # Setup main data structures - state = setup(params,timer) + @timeit timer "setup" state = setup(params,timer) add_basic_info(results,params,state) # Assemble system and solve it - A,b = assemble_system(state) - x = solve_system(A,b,params) + @timeit timer "assemble_system" A,b = assemble_system(state) + @timeit timer "solve_system" x = solve_system(A,b,params,state) # Post process - uh = setup_uh(x,state) - integrate_error_norms(results,uh,state) - export_results(uh,params,state) + @timeit timer "setup_uh" uh = setup_uh(x,state) + @timeit timer "integrate_error_norms" integrate_error_norms(results,uh,state) + @timeit timer "export_results" export_results(uh,params,state) + + map_main(state.local_states) do local_state + display(local_state.timer) + end results end @@ -58,20 +63,65 @@ function default_params() ghost_layers = 0 mesh = gk.cartesian_mesh(domain,cells_per_dir,parts_per_dir;parts,ghost_layers) outdir = mkpath(joinpath(@__DIR__,"..","output")) - params = Example001.default_params() + params = Dict{Symbol,Any}() + params[:u] = (x) -> sum(x) + params[:f] = (x) -> 0.0 + params[:g] = (x) -> 0.0 + params[:dirichlet_tags] = ["boundary"] + params[:neumann_tags] = String[] + params[:integration_degree] = 2 + params[:solver] = lu_solver() + params[:export_vtu] = true params[:mesh] = mesh params[:example_path] = joinpath(outdir,"example002") params end +# Not a parallel solver +# just for debugging purposes +function lu_solver() + setup(x,A,b) = lu(A) + setup! = lu! + solve! = ldiv! + finalize!(S) = nothing + (;setup,setup!,solve!,finalize!) +end + +function ksp_solver() + setup = PetscCall.ksp_setup + setup! = PetscCall.ksp_setup! + solve! = PetscCall.ksp_solve! + finalize! = PetscCall.ksp_finalize! + (;setup,setup!,solve!,finalize!) +end + function setup(params,timer) mesh = params[:mesh] - local_states = map(partition(mesh)) do mesh + local_states_0 = map(partition(mesh)) do mesh local_params = copy(params) local_params[:mesh] = mesh - Example001.setup(local_params,timer) + local_setup(local_params,timer) end + @timeit timer "setup_dofs" dofs,local_dofs = setup_dofs(params,local_states_0) + local_states = map((a,dofs)->(;dofs,a...),local_states_0,local_dofs) + state = (;local_states,dofs,timer) + state +end +function local_setup(params,timer) + @timeit timer "dirichlet_bcs" dirichlet_bcs = setup_dirichlet_bcs(params) + @timeit timer "neumann_bcs" neumann_bcs = setup_neumann_bcs(params) + @timeit timer "cell_integration" cell_integration = setup_integration(params,:cells) + @timeit timer "face_integration" face_integration = setup_integration(params,:faces) + @timeit timer "cell_isomap" cell_isomap = setup_isomap(params,cell_integration) + @timeit timer "face_isomap" face_isomap = setup_isomap(params,face_integration) + @timeit timer "user_funs" user_funs = setup_user_funs(params) + solver = params[:solver] + state = (;timer,solver,dirichlet_bcs,neumann_bcs,cell_integration,cell_isomap,face_integration,face_isomap,user_funs) +end + +function setup_dofs(params,local_states) + mesh = params[:mesh] node_partition = gk.node_partition(mesh) global_node_to_mask = pfill(false,node_partition) function fillmask!(node_to_mask,state) @@ -81,13 +131,108 @@ function setup(params,timer) map(fillmask!,partition(global_node_to_mask),local_states) global_dof_to_node, global_node_to_dof = find_local_indices(global_node_to_mask) dof_partition = partition(axes(global_dof_to_node,1)) - state = (;local_states,global_dof_to_node, global_node_to_dof) - state + function setup_local_dofs(state,node_to_dof) + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_free_node = gk.permutation(free_and_dirichlet_nodes) + n_free = length(first(free_and_dirichlet_nodes)) + n_dofs = n_free + cell_to_nodes = state.cell_isomap.face_to_nodes + face_to_nodes = state.face_isomap.face_to_nodes + function map_node_to_dof(node) + free_node = node_to_free_node[node] + if free_node > n_free + return Int(-(free_node-n_free)) + end + dof = node_to_dof[node] + Int(dof) + end + cell_to_dofs_data = map_node_to_dof.(cell_to_nodes.data) + face_to_dofs_data = map_node_to_dof.(face_to_nodes.data) + cell_to_dofs = JaggedArray(cell_to_dofs_data,cell_to_nodes.ptrs) + face_to_dofs = JaggedArray(face_to_dofs_data,face_to_nodes.ptrs) + (;cell_to_dofs,face_to_dofs,n_dofs) + end + local_dofs = map(setup_local_dofs,local_states,partition(global_node_to_dof)) + (;dof_partition,global_node_to_dof,global_dof_to_node), local_dofs +end + +function setup_dirichlet_bcs(params) + mesh = params[:mesh] + u = params[:u] + dirichlet_tags = params[:dirichlet_tags] + node_to_tag = zeros(gk.num_nodes(mesh)) + tag_to_name = dirichlet_tags + gk.classify_mesh_nodes!(node_to_tag,mesh,tag_to_name) + free_and_dirichlet_nodes = gk.partition_from_mask(i->i==0,node_to_tag) + node_to_x = gk.node_coordinates(mesh) + dirichlet_nodes = last(free_and_dirichlet_nodes) + x_dirichlet = view(node_to_x,dirichlet_nodes) + u_dirichlet = u.(x_dirichlet) + dirichlet_bcs = (;free_and_dirichlet_nodes,u_dirichlet,node_to_tag) +end + +function setup_integration(params,objects) + mesh = params[:mesh] + degree = params[:integration_degree] + D = gk.num_dims(mesh) + if objects === :cells + d = D + elseif objects === :faces + d = D-1 + else + error("") + end + # Integration + ref_cells = gk.reference_faces(mesh,d) + face_to_rid = gk.face_reference_id(mesh,d) + integration_rules = map(ref_cells) do ref_cell + gk.default_quadrature(gk.geometry(ref_cell),degree) + end + rid_to_weights = map(gk.weights,integration_rules) + rid_to_coords = map(gk.coordinates,integration_rules) + integration = (;rid_to_weights,rid_to_coords,face_to_rid,d) +end + +function setup_isomap(params,integration) + rid_to_coords = integration.rid_to_coords + face_to_rid = integration.face_to_rid + d = integration.d + mesh = params[:mesh] + ref_cells = gk.reference_faces(mesh,d) + shape_funs = map(rid_to_coords,ref_cells) do q,ref_cell + shape_vals = gk.tabulator(ref_cell)(gk.value,q) + shape_grads = gk.tabulator(ref_cell)(ForwardDiff.gradient,q) + shape_vals, shape_grads + end + rid_to_shape_vals = map(first,shape_funs) + rid_to_shape_grads = map(last,shape_funs) + face_to_nodes = JaggedArray(gk.face_nodes(mesh,d)) + node_to_coords = gk.node_coordinates(mesh) + isomap = (;face_to_nodes,node_to_coords,face_to_rid,rid_to_shape_vals,rid_to_shape_grads,d) +end + +function setup_neumann_bcs(params) + mesh = params[:mesh] + neumann_tags = params[:neumann_tags] + neum_face_to_face = Int[] + if length(neumann_tags) != 0 + D = gk.num_dims(mesh) + tag_to_groups = gk.physical_faces(mesh,D-1) + neum_face_to_face = reduce(union,map(tag->tag_to_groups[tag],neumann_tags)) + end + (;neum_face_to_face) +end + +function setup_user_funs(params) + u = params[:u] + f = params[:f] + g = params[:g] + user_funs = (;u,f,g) end function add_basic_info(results,params,state) mesh = params[:mesh] - nfree = length(state.global_dof_to_node) + nfree = length(state.dofs.global_dof_to_node) nnodes = gk.num_nodes(mesh) ncells = gk.num_faces(mesh,gk.num_dims(mesh)) results[:nfree] = nfree @@ -97,70 +242,255 @@ function add_basic_info(results,params,state) end function assemble_system(state) - dof_partition = partition(axes(state.global_dof_to_node,1)) - b = pzeros(Float64,dof_partition) - I,J,V,free_node_to_b = map(Example001.assemble_system_loop,state.local_states) |> tuple_of_arrays - function map_dofs(state,node_to_dof,dofs,I,J,free_node_to_b,dof_to_b) - free_node_to_node = first(state.dirichlet_bcs.free_and_dirichlet_nodes) - dof_to_global_dof = local_to_global(dofs) - function free_node_to_global_dof(free_node) - # TODO this free_node -> dof should not be needed of the order free dofs conveniently - node = free_node_to_node[free_node] - dof = node_to_dof[node] - global_dof = dof_to_global_dof[dof] - global_dof - end - I .= free_node_to_global_dof.(I) - J .= free_node_to_global_dof.(J) - free_node_to_dof = view(node_to_dof,free_node_to_node) - # TODO this one should not be needed of the order free dofs conveniently - dof_to_b[free_node_to_dof] = free_node_to_b - nothing - end - map(map_dofs,state.local_states,partition(state.global_node_to_dof),dof_partition,I,J,free_node_to_b,partition(b)) - A = psparse(I,J,V,dof_partition,dof_partition;assemble=false) |> fetch - # TODO - A = assemble(A) |> fetch - b = assemble(b,partition(axes(A,1))) |> fetch + timer = state.timer + dof_partition = partition(axes(state.dofs.global_dof_to_node,1)) + @timeit timer "assemble_system_coo" I,J,V,b_partition = map(assemble_system_coo,state.local_states) |> tuple_of_arrays + @timeit timer "psparse" A = psparse(I,J,V,dof_partition,dof_partition;subassembled=true,indices=:local) |> fetch + row_partition = partition(axes(A,1)) + b = PVector(b_partition,dof_partition) + @timeit timer "assemble_b" b = assemble(b,row_partition) |> fetch A,b end -function solve_system(A,b,params) +function assemble_system_coo(state) + timer = state.timer + @timeit timer "assemble_sybmolic" args = assemble_sybmolic(state) + @timeit timer "assemble_numeric_cells!" assemble_numeric_cells!(args...,state) + @timeit timer "assemble_numeric_faces!" assemble_numeric_faces!(args...,state) + args +end + +function assemble_sybmolic(state) + cell_to_dofs = state.dofs.cell_to_dofs + + n_coo = 0 + ncells = length(cell_to_dofs) + for cell in 1:ncells + dofs = cell_to_dofs[cell] + ndofs = length(dofs) + for i in 1:ndofs + if !(dofs[i]>0) + continue + end + for j in 1:ndofs + if !(dofs[j]>0) + continue + end + n_coo += 1 + end + end + end + + I_coo = Vector{Int32}(undef,n_coo) + J_coo = Vector{Int32}(undef,n_coo) + V_coo = Vector{Float64}(undef,n_coo) + b = zeros(Float64,state.dofs.n_dofs) + + n_coo = 0 + for cell in 1:ncells + dofs = cell_to_dofs[cell] + ndofs = length(dofs) + for i in 1:ndofs + if !(dofs[i]>0) + continue + end + dofs_i = dofs[i] + for j in 1:ndofs + if !(dofs[j]>0) + continue + end + n_coo += 1 + I_coo[n_coo] = dofs_i + J_coo[n_coo] = dofs[j] + end + end + end + + I_coo,J_coo,V_coo,b +end + +function assemble_numeric_cells!(I_coo,J_coo,V_coo,b,state) + + cell_to_dofs = state.dofs.cell_to_dofs + cell_to_nodes = state.cell_isomap.face_to_nodes + cell_to_rid = state.cell_isomap.face_to_rid + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_x = state.cell_isomap.node_to_coords + ∇s = state.cell_isomap.rid_to_shape_grads + s = state.cell_isomap.rid_to_shape_vals + u_dirichlet = state.dirichlet_bcs.u_dirichlet + f = state.user_funs.f + w = state.cell_integration.rid_to_weights + d = state.cell_integration.d + u_dirichlet = state.dirichlet_bcs.u_dirichlet + ncells = length(cell_to_nodes) + + # Allocate auxiliary buffers + ∇x = map(i->similar(i,size(i,2)),∇s) + Aes = map(i->zeros(size(i,2),size(i,2)),∇s) + fes = map(i->zeros(size(i,2)),∇s) + ∇st = map(m->collect(permutedims(m)),∇s) + st = map(m->collect(permutedims(m)),s) + + Tx = eltype(node_to_x) + TJ = typeof(zero(Tx)*zero(Tx)') + + i_coo = 0 + for cell in 1:ncells + rid = cell_to_rid[cell] + nodes = cell_to_nodes[cell] + dofs = cell_to_dofs[cell] + Ae = Aes[rid] + fe = fes[rid] + nl = length(nodes) + ∇ste = ∇st[rid] + ∇xe = ∇x[rid] + ste = st[rid] + we = w[rid] + nq = length(we) + fill!(Ae,zero(eltype(Ae))) + fill!(fe,zero(eltype(Ae))) + # Integrate + for iq in 1:nq + Jt = zero(TJ) + xint = zero(Tx) + for k in 1:nl + x = node_to_x[nodes[k]] + ∇sqx = ∇ste[k,iq] + sqx = ste[k,iq] + Jt += ∇sqx*x' + xint += sqx*x + end + detJt = det(Jt) + invJt = inv(Jt) + dV = abs(detJt)*we[iq] + for k in 1:nl + ∇xe[k] = invJt*∇ste[k,iq] + end + for j in 1:nl + ∇xej = ∇xe[j] + for i in 1:nl + Ae[i,j] += ∇xe[i]⋅∇xej*dV + end + end + fx = f(xint) + for k in 1:nl + fe[k] += fx*ste[k,iq]*dV + end + end + # Set the result in the output array + for i in 1:nl + if !(dofs[i]>0) + continue + end + b[dofs[i]] += fe[i] + for j in 1:nl + if !(dofs[j]>0) + uj = u_dirichlet[-dofs[j]] + b[dofs[i]] -= Ae[i,j]*uj + continue + end + i_coo += 1 + V_coo[i_coo] = Ae[i,j] + end + end + end + +end + +function assemble_numeric_faces!(I_coo,J_coo,V_coo,b,state) + + neum_face_to_face = state.neumann_bcs.neum_face_to_face + if length(neum_face_to_face) == 0 + return nothing + end + + face_to_rid = state.face_isomap.face_to_rid + face_to_dofs = state.dofs.face_to_dofs + face_to_nodes = state.face_isomap.face_to_nodes + ∇s_f = state.face_isomap.rid_to_shape_grads + s_f = state.face_isomap.rid_to_shape_vals + node_to_x = state.face_isomap.node_to_coords + g = state.user_funs.g + w_f = state.face_integration.rid_to_weights + d = state.cell_integration.d + + fes = map(i->zeros(size(i,2)),s_f) + Tx = SVector{d,Float64} + Ts = typeof(zero(SVector{d-1,Float64})*zero(Tx)') + + for face in neum_face_to_face + rid = face_to_rid[face] + nodes = face_to_nodes[face] + dofs = face_to_dofs[face] + ∇se = ∇s_f[rid] + se = s_f[rid] + we = w_f[rid] + nq = length(we) + fe = fes[rid] + fill!(fe,zero(eltype(fe))) + nl = length(nodes) + for iq in 1:nq + Jt = zero(Ts) + xint = zero(Tx) + for k in 1:nl + x = node_to_x[nodes[k]] + ∇sqx = ∇se[iq,k] + sqx = se[iq,k] + Jt += ∇sqx*x' + xint += sqx*x + end + J = transpose(Jt) + dS = sqrt(det(Jt*J))*we[iq] + gx = g(xint) + for k in 1:nl + fe[k] += gx*se[iq,k]*dS + end + end + for i in 1:nl + if !(dofs[i]>0) + continue + end + b[dofs[i]] += fe[i] + end + end +end + +function solve_system(A,b,params,state) + timer = state.timer solver = params[:solver] x = similar(b,axes(A,2)) - setup = solver.setup(x,A,b) - solver.solve!(x,setup,b) - solver.finalize!(setup) + @timeit timer "solver.setup" setup = solver.setup(x,A,b) + @timeit timer "solver.solve!" solver.solve!(x,setup,b) + @timeit timer "solver.finalize!" solver.finalize!(setup) x end function setup_uh(x,state) - dofs = axes(state.global_dof_to_node,1) + dofs = axes(state.dofs.global_dof_to_node,1) local_states = state.local_states - - # TODO this similar can be avoided when using - # a sub-assembled system global_dof_to_x = similar(x,dofs) global_dof_to_x .= x consistent!(global_dof_to_x) |> wait - - # TODO this map can be avoided - # when the local numeration of free nodes coincides - # with local dofs. - function map_dofs(dof_to_x,node_to_dof,state) - free_node_to_node = first(state.dirichlet_bcs.free_and_dirichlet_nodes) - free_node_to_dof = view(node_to_dof,free_node_to_node) - view(dof_to_x,free_node_to_dof) + function setup_local_uh(dof_to_x,state,node_to_dof) + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_x = state.cell_isomap.node_to_coords + free_node_to_node = first(free_and_dirichlet_nodes) + @views u_free = dof_to_x[node_to_dof[free_node_to_node]] + u_dirichlet = state.dirichlet_bcs.u_dirichlet + nnodes = length(node_to_x) + local_uh = zeros(nnodes) + local_uh[first(free_and_dirichlet_nodes)] = u_free + local_uh[last(free_and_dirichlet_nodes)] = u_dirichlet + local_uh end - free_node_to_x = map(map_dofs,partition(global_dof_to_x),partition(state.global_node_to_dof),state.local_states) - - uh = map(Example001.setup_uh,free_node_to_x,local_states) + uh = map(setup_local_uh,partition(global_dof_to_x),local_states,partition(state.dofs.global_node_to_dof)) uh end function integrate_error_norms(results,uh,state) local_states = state.local_states - eh1², el2² = map(Example001.integrate_error_norms_loop,uh,local_states) |> tuple_of_arrays + eh1², el2² = map(integrate_error_norms_loop,uh,local_states) |> tuple_of_arrays eh1 = sqrt(sum(eh1²)) el2 = sqrt(sum(el2²)) results[:eh1] = eh1 @@ -168,7 +498,78 @@ function integrate_error_norms(results,uh,state) results end +function integrate_error_norms_loop(uh,state) + + cell_to_nodes = state.cell_isomap.face_to_nodes + cell_to_rid = state.cell_isomap.face_to_rid + free_and_dirichlet_nodes = state.dirichlet_bcs.free_and_dirichlet_nodes + node_to_x = state.cell_isomap.node_to_coords + ∇s = state.cell_isomap.rid_to_shape_grads + s = state.cell_isomap.rid_to_shape_vals + u_dirichlet = state.dirichlet_bcs.u_dirichlet + u = state.user_funs.u + w = state.cell_integration.rid_to_weights + d = state.cell_integration.d + + eh1 = 0.0 + el2 = 0.0 + ncells = length(cell_to_rid) + ues = map(i->zeros(size(i,2)),∇s) + ∇x = map(i->similar(i,size(i,2)),∇s) + Tx = eltype(node_to_x) + TJ = typeof(zero(Tx)*zero(Tx)') + for cell in 1:ncells + rid = cell_to_rid[cell] + nodes = cell_to_nodes[cell] + ue = ues[rid] + nl = length(nodes) + ∇se = ∇s[rid] + ∇xe = ∇x[rid] + se = s[rid] + we = w[rid] + nq = length(we) + for k in 1:nl + nk = nodes[k] + ue[k] = uh[nk] + end + for iq in 1:nq + Jt = zero(TJ) + xint = zero(Tx) + for k in 1:nl + x = node_to_x[nodes[k]] + ∇sqx = ∇se[iq,k] + sqx = se[iq,k] + Jt += ∇sqx*x' + xint += sqx*x + end + detJt = det(Jt) + invJt = inv(Jt) + dV = abs(detJt)*we[iq] + for k in 1:nl + ∇xe[k] = invJt*∇se[iq,k] + end + ux = u(xint) + ∇ux = ForwardDiff.gradient(u,xint) + ∇uhx = zero(∇ux) + uhx = zero(ux) + for k in 1:nl + uek = ue[k] + ∇uhx += uek*∇xe[k] + uhx += uek*se[iq,k] + end + ∇ex = ∇ux - ∇uhx + ex = ux - uhx + eh1 += (∇ex⋅∇ex + ex*ex)*dV + el2 += ex*ex*dV + end + end + eh1, el2 +end + function export_results(uh,params,state) + if ! params[:export_vtu] + return nothing + end example_path = params[:example_path] mesh = params[:mesh] ranks = linear_indices(gk.node_partition(mesh)) diff --git a/extensions/GalerkinToolkitExamples/test/example002_defs.jl b/extensions/GalerkinToolkitExamples/test/example002_defs.jl new file mode 100644 index 00000000..4e6eb1c5 --- /dev/null +++ b/extensions/GalerkinToolkitExamples/test/example002_defs.jl @@ -0,0 +1,39 @@ + +import GalerkinToolkit as gk +using GalerkinToolkitExamples: Example002 +using Test +using PartitionedArrays +using PetscCall + +function example002_tests_np_4(distribute) + tol = 1.0e-8 + params = Dict{Symbol,Any}() + domain = (0,10,0,10) + cells_per_dir = (20,20) + parts_per_dir = (2,2) + np = prod(parts_per_dir) + parts = distribute(LinearIndices((np,))) + ghost_layers = 0 + mesh = gk.cartesian_mesh(domain,cells_per_dir,parts_per_dir;parts,ghost_layers) + params[:mesh] = mesh + results = Example002.main(params) + results = Example002.main(params) + @test results[:eh1] < tol + @test results[:el2] < tol + + options = "-pc_type gamg -ksp_type cg -ksp_error_if_not_converged true -ksp_converged_reason -ksp_rtol 1.0e-6" + PetscCall.init(args=split(options)) + params = Dict{Symbol,Any}() + domain = (0,10,0,10,0,10) + cells_per_dir = (40,40,40) + parts_per_dir = (2,2,1) + np = prod(parts_per_dir) + parts = distribute(LinearIndices((np,))) + ghost_layers = 0 + mesh = gk.cartesian_mesh(domain,cells_per_dir,parts_per_dir;parts,ghost_layers) + params[:mesh] = mesh + params[:export_vtu] = false + params[:solver] = Example002.ksp_solver() + results = Example002.main(params) + results = Example002.main(params) +end diff --git a/extensions/GalerkinToolkitExamples/test/example002_tests.jl b/extensions/GalerkinToolkitExamples/test/example002_tests.jl index 27149066..c6cc72cb 100644 --- a/extensions/GalerkinToolkitExamples/test/example002_tests.jl +++ b/extensions/GalerkinToolkitExamples/test/example002_tests.jl @@ -1,28 +1,6 @@ module Example002Tests -import GalerkinToolkit as gk -using GalerkinToolkitExamples: Example002 -using Test -using PartitionedArrays - -tol = 1.0e-10 - -params = Dict{Symbol,Any}() -results = Example002.main(params) -@test results[:eh1] < tol -@test results[:el2] < tol - -domain = (0,10,0,10) -cells_per_dir = (20,20) -parts_per_dir = (2,2) -np = prod(parts_per_dir) -parts = DebugArray(LinearIndices((np,))) -ghost_layers = 0 -mesh = gk.cartesian_mesh(domain,cells_per_dir,parts_per_dir;parts,ghost_layers) - -params = Dict{Symbol,Any}() -results = Example002.main(params) -@test results[:eh1] < tol -@test results[:el2] < tol +include("example002_defs.jl") +with_debug(example002_tests_np_4) end # module diff --git a/extensions/GalerkinToolkitExamples/test/mpi_array/example002_driver_np_4.jl b/extensions/GalerkinToolkitExamples/test/mpi_array/example002_driver_np_4.jl new file mode 100644 index 00000000..e29ef5c9 --- /dev/null +++ b/extensions/GalerkinToolkitExamples/test/mpi_array/example002_driver_np_4.jl @@ -0,0 +1,8 @@ +module TMP + +using PartitionedArrays + +include(joinpath("..","example002_defs.jl")) +with_mpi(example002_tests_np_4) + +end # module diff --git a/extensions/GalerkinToolkitExamples/test/mpi_array/example002_tests.jl b/extensions/GalerkinToolkitExamples/test/mpi_array/example002_tests.jl new file mode 100644 index 00000000..897646a6 --- /dev/null +++ b/extensions/GalerkinToolkitExamples/test/mpi_array/example002_tests.jl @@ -0,0 +1,11 @@ +module Example002Tests + +using MPI + +include("run_mpi_driver.jl") + +file = joinpath(@__DIR__,"example002_driver_np_4.jl") +run_mpi_driver(file;procs=4) + +end # module + diff --git a/extensions/GalerkinToolkitExamples/test/mpi_array/run_mpi_driver.jl b/extensions/GalerkinToolkitExamples/test/mpi_array/run_mpi_driver.jl new file mode 100644 index 00000000..0d85dcaf --- /dev/null +++ b/extensions/GalerkinToolkitExamples/test/mpi_array/run_mpi_driver.jl @@ -0,0 +1,15 @@ +using MPI +using Test +function run_mpi_driver(file;procs) + repodir = normpath(joinpath(@__DIR__,"..","..")) + mpiexec() do cmd + if MPI.MPI_LIBRARY == "OpenMPI" || (isdefined(MPI, :OpenMPI) && MPI.MPI_LIBRARY == MPI.OpenMPI) + run(`$cmd -n $procs --oversubscribe $(Base.julia_cmd()) --project=$repodir $(file)`) + else + run(`$cmd -n $procs $(Base.julia_cmd()) --project=$repodir $(file)`) + end + # This line will be reached if and only if the command launched by `run` runs without errors. + # Then, if we arrive here, the test has succeeded. + @test true + end +end diff --git a/extensions/GalerkinToolkitExamples/test/mpi_array/runtests.jl b/extensions/GalerkinToolkitExamples/test/mpi_array/runtests.jl new file mode 100644 index 00000000..c7602f63 --- /dev/null +++ b/extensions/GalerkinToolkitExamples/test/mpi_array/runtests.jl @@ -0,0 +1,9 @@ + +module MPIArrayRunTests + +using Test +using PartitionedArrays + +@testset "example002" begin include("example002_tests.jl") end + +end #module diff --git a/extensions/GalerkinToolkitExamples/test/runtests.jl b/extensions/GalerkinToolkitExamples/test/runtests.jl index b448524c..c7d1e6a4 100644 --- a/extensions/GalerkinToolkitExamples/test/runtests.jl +++ b/extensions/GalerkinToolkitExamples/test/runtests.jl @@ -7,4 +7,6 @@ using Test @testset "example002" begin include("example002_tests.jl") end end +include(joinpath("mpi_array","runtests.jl")) + end # module