diff --git a/.gitignore b/.gitignore index 91a3e882..4f9205f8 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ /dev/ /docs/build/ /docs/site/ +/docs/Manifest.toml /tmp/ *.vtu *.pvtu diff --git a/NEWS.md b/NEWS.md index 9d399950..649e540e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,7 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased -### Added +### Added + +- Added support for distributed block-assembly. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124). +- Add possibility to use `OwnAndGhostVector` as vector partition for `FESpace` dofs. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124). +- Implement `BlockPArray <: AbstractBlockArray`, a new type that behaves as a `BlockArray{PArray}` and which fulfills the APIs of both `PArray` and `AbstractBlockArray`. This new type will be used to implement distributed block-assembly. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124). +- `DistributedMultiFieldFESpace{<:BlockMultiFieldStyle}` now has a `BlockPRange` as gids and `BlockPVector` as vector type. This is necessary to create consistency between fespace and system vectors, which in turn avoids memory allocations/copies when transferring between FESpace and linear system layouts. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124). + +### Changed + +- Merged functionalities of `consistent_local_views` and `change_ghost`. `consistent_local_views` has been removed. `change_ghost` now has two keywargs `is_consistent` and `make_consistent` that take into consideration all possible use cases. `change_ghost` has also been optimized to avoid unnecessary allocations if possible. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124). + +## [0.3.1] - 2023-10-01 + +### Added - Added missing _get_cell_dof_ids_inner_space() method overload. Since PR[130](https://github.com/gridap/GridapDistributed.jl/pull/130). - Added missing remove_ghost_cells() overload for AdaptiveTriangulation. Since PR[131](https://github.com/gridap/GridapDistributed.jl/pull/131). diff --git a/Project.toml b/Project.toml index 3d4c7ab4..62fbbf10 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["S. Badia ", "A. F. Martin "GridapDistributed.md", "Algebra" => "Algebra.md", "CellData" => "CellData.md", - "DivConformingFESpaces" => "DivConformingFESpaces.md", "FESpaces" => "FESpaces.md", "Geometry" => "Geometry.md", "MultiField" => "MultiField.md", "Visualization" => "Visualization.md", + "Adaptivity" => "Adaptivity.md", ] - makedocs(; modules=[GridapDistributed], format=Documenter.HTML(), pages=pages, repo="https://github.com/gridap/GridapDistributed.jl/blob/{commit}{path}#L{line}", sitename="GridapDistributed.jl", - authors="S. Badia , A. F. Martin , F. Verdugo ", + authors="S. Badia , A. F. Martin , F. Verdugo ", + # warnonly=true, # for debugging ) deploydocs(; diff --git a/docs/src/Adaptivity.md b/docs/src/Adaptivity.md new file mode 100644 index 00000000..56ef09eb --- /dev/null +++ b/docs/src/Adaptivity.md @@ -0,0 +1,6 @@ +# Adaptivity + +```@autodocs +Modules = [GridapDistributed] +Pages = ["Adaptivity.jl"] +``` diff --git a/docs/src/DivConformingFESpaces.md b/docs/src/DivConformingFESpaces.md deleted file mode 100644 index 1a4419e9..00000000 --- a/docs/src/DivConformingFESpaces.md +++ /dev/null @@ -1,6 +0,0 @@ -# DivConformingFESpaces - -```@autodocs -Modules = [GridapDistributed] -Pages = ["DivConformingFESpaces.jl"] -``` \ No newline at end of file diff --git a/docs/src/FESpaces.md b/docs/src/FESpaces.md index 83fc837c..37f10693 100644 --- a/docs/src/FESpaces.md +++ b/docs/src/FESpaces.md @@ -2,5 +2,5 @@ ```@autodocs Modules = [GridapDistributed] -Pages = ["FESpaces.jl"] +Pages = ["FESpaces.jl","DivConformingFESpaces.jl"] ``` \ No newline at end of file diff --git a/docs/src/MultiField.md b/docs/src/MultiField.md index 4b7c2318..f1472582 100644 --- a/docs/src/MultiField.md +++ b/docs/src/MultiField.md @@ -2,5 +2,5 @@ ```@autodocs Modules = [GridapDistributed] -Pages = ["MultiField.jl"] -``` \ No newline at end of file +Pages = ["MultiField.jl","BlockPartitionedArrays.jl"] +``` diff --git a/src/Adaptivity.jl b/src/Adaptivity.jl index 0045fc63..1ce0d7ff 100644 --- a/src/Adaptivity.jl +++ b/src/Adaptivity.jl @@ -22,6 +22,7 @@ end RedistributeGlue Glue linking two distributions of the same mesh. + - `new_parts`: Array with the new part IDs (and comms) - `old_parts`: Array with the old part IDs (and comms) - `parts_rcv`: Array with the part IDs from which each part receives diff --git a/src/Algebra.jl b/src/Algebra.jl index 52d07d57..e8fedd02 100644 --- a/src/Algebra.jl +++ b/src/Algebra.jl @@ -1,3 +1,14 @@ + +# Vector allocation + +function Algebra.allocate_vector(::Type{<:PVector{V}},ids::PRange) where {V} + PVector{V}(undef,partition(ids)) +end + +function Algebra.allocate_vector(::Type{<:BlockPVector{V}},ids::BlockPRange) where {V} + BlockPVector{V}(undef,ids) +end + # This might go to Gridap in the future. We keep it here for the moment. function change_axes(a::Algebra.ArrayCounter,axes) @notimplemented @@ -5,17 +16,36 @@ end # This might go to Gridap in the future. We keep it here for the moment. function change_axes(a::Algebra.CounterCOO{T,A}, axes::A) where {T,A} - b=Algebra.CounterCOO{T}(axes) + b = Algebra.CounterCOO{T}(axes) b.nnz = a.nnz b end # This might go to Gridap in the future. We keep it here for the moment. function change_axes(a::Algebra.AllocationCOO{T,A}, axes::A) where {T,A} - counter=change_axes(a.counter,axes) + counter = change_axes(a.counter,axes) Algebra.AllocationCOO(counter,a.I,a.J,a.V) end +# Array of PArrays -> PArray of Arrays +function to_parray_of_arrays(a::AbstractArray{<:MPIArray}) + indices = linear_indices(first(a)) + map(indices) do i + map(a) do aj + PartitionedArrays.getany(aj) + end + end +end + +function to_parray_of_arrays(a::AbstractArray{<:DebugArray}) + indices = linear_indices(first(a)) + map(indices) do i + map(a) do aj + aj.items[i] + end + end +end + # This type is required because MPIArray from PArrays # cannot be instantiated with a NULL communicator struct MPIVoidVector{T} <: AbstractVector{T} @@ -112,8 +142,8 @@ function local_views(a) @abstractmethod end -function get_parts(x) - return linear_indices(local_views(x)) +function get_parts(a) + return linear_indices(local_views(a)) end function local_views(a::AbstractVector,rows) @@ -124,24 +154,56 @@ function local_views(a::AbstractMatrix,rows,cols) @notimplemented end -function consistent_local_views(a,ids,isconsistent) - @abstractmethod +local_views(a::AbstractArray) = a +local_views(a::PRange) = partition(a) +local_views(a::PVector) = partition(a) +local_views(a::PSparseMatrix) = partition(a) + +function local_views(a::BlockPRange) + map(blocks(a)) do a + local_views(a) + end |> to_parray_of_arrays end -function local_views(a::AbstractArray) - a +function local_views(a::BlockPArray) + vals = map(blocks(a)) do a + local_views(a) + end |> to_parray_of_arrays + return map(mortar,vals) +end + +# change_ghost + +function change_ghost(a::PVector{T},ids::PRange;is_consistent=false,make_consistent=false) where T + same_partition = (a.index_partition === partition(ids)) + a_new = same_partition ? a : change_ghost(T,a,ids) + if make_consistent && (!same_partition || !is_consistent) + consistent!(a_new) |> wait + end + return a_new end -function local_views(a::PRange) - partition(a) +function change_ghost(::Type{<:AbstractVector},a::PVector,ids::PRange) + a_new = similar(a,eltype(a),(ids,)) + # Equivalent to copy!(a_new,a) but does not check that owned indices match + map(copy!,own_values(a_new),own_values(a)) + return a_new end -function local_views(a::PVector) - partition(a) +function change_ghost(::Type{<:OwnAndGhostVectors},a::PVector,ids::PRange) + values = map(own_values(a),partition(ids)) do own_vals,ids + ghost_vals = fill(zero(eltype(a)),ghost_length(ids)) + perm = PartitionedArrays.local_permutation(ids) + OwnAndGhostVectors(own_vals,ghost_vals,perm) + end + return PVector(values,partition(ids)) end -function local_views(a::PSparseMatrix) - partition(a) +function change_ghost(a::BlockPVector,ids::BlockPRange;is_consistent=false,make_consistent=false) + vals = map(blocks(a),blocks(ids)) do a, ids + change_ghost(a,ids;is_consistent=is_consistent,make_consistent=make_consistent) + end + return BlockPVector(vals,ids) end # This function computes a mapping among the local identifiers of a and b @@ -199,71 +261,46 @@ function _lid_to_plid(lid,lid_to_plid) plid end -function local_views(row_partitioned_vector::PVector,test_dofs_partition::PRange) - if row_partitioned_vector.index_partition === partition(test_dofs_partition) - @assert false +function local_views(a::PVector,new_rows::PRange) + old_rows = axes(a,1) + if partition(old_rows) === partition(new_rows) + partition(a) else - map(partition(row_partitioned_vector), - partition(test_dofs_partition), - row_partitioned_vector.index_partition) do vector_partition,dofs_partition,row_partition - LocalView(vector_partition,(find_local_to_local_map(dofs_partition,row_partition),)) + map(partition(a),partition(old_rows),partition(new_rows)) do vector_partition,old_rows,new_rows + LocalView(vector_partition,(find_local_to_local_map(new_rows,old_rows),)) end end end -function local_views(row_col_partitioned_matrix::PSparseMatrix, - test_dofs_partition::PRange, - trial_dofs_partition::PRange) - if (row_col_partitioned_matrix.row_partition === partition(test_dofs_partition) || - row_col_partitioned_matrix.col_partition === partition(trial_dofs_partition) ) - @assert false - else - map( - partition(row_col_partitioned_matrix), - partition(test_dofs_partition), - partition(trial_dofs_partition), - row_col_partitioned_matrix.row_partition, - row_col_partitioned_matrix.col_partition) do matrix_partition, - test_dof_partition, - trial_dof_partition, - row_partition, - col_partition - rl2lmap = find_local_to_local_map(test_dof_partition,row_partition) - cl2lmap = find_local_to_local_map(trial_dof_partition,col_partition) - LocalView(matrix_partition,(rl2lmap,cl2lmap)) - end - end -end - -function change_ghost(a::PVector,ids_fespace::PRange) - if a.index_partition === partition(ids_fespace) - a_fespace = a +function local_views(a::PSparseMatrix,new_rows::PRange,new_cols::PRange) + old_rows, old_cols = axes(a) + if (partition(old_rows) === partition(new_rows) && partition(old_cols) === partition(new_cols) ) + partition(a) else - a_fespace = similar(a,eltype(a),(ids_fespace,)) - a_fespace .= a + map(partition(a), + partition(old_rows),partition(old_cols), + partition(new_rows),partition(new_cols)) do matrix_partition,old_rows,old_cols,new_rows,new_cols + rl2lmap = find_local_to_local_map(new_rows,old_rows) + cl2lmap = find_local_to_local_map(new_cols,old_cols) + LocalView(matrix_partition,(rl2lmap,cl2lmap)) + end end - a_fespace end -function consistent_local_views(a::PVector, - ids_fespace::PRange, - isconsistent) - a_fespace = change_ghost(a,ids_fespace) - if ! isconsistent - fetch_vector_ghost_values!(partition(a_fespace), - map(reverse,a_fespace.cache)) |> wait - end - partition(a_fespace) +function local_views(a::BlockPVector,new_rows::BlockPRange) + vals = map(local_views,blocks(a),blocks(new_rows)) |> to_parray_of_arrays + return map(mortar,vals) end -function Algebra.allocate_vector(::Type{<:PVector{V,A}},ids::PRange) where {V,A} - values = map(partition(ids)) do ids - Tv = eltype(A) - Tv(undef,length(local_to_owner(ids))) - end - PVector(values,partition(ids)) +function local_views(a::BlockPMatrix,new_rows::BlockPRange,new_cols::BlockPRange) + vals = map(CartesianIndices(blocksize(a))) do I + local_views(a[Block(I)],new_rows[Block(I[1])],new_cols[Block(I[2])]) + end |> to_parray_of_arrays + return map(mortar,vals) end +# PSparseMatrix assembly + struct FullyAssembledRows end struct SubAssembledRows end @@ -287,6 +324,12 @@ function Algebra.nz_counter( DistributedCounterCOO(builder.par_strategy,counters,test_dofs_gids_prange,trial_dofs_gids_prange) end +function Algebra.get_array_type(::PSparseMatrixBuilderCOO{Tv}) where Tv + T = eltype(Tv) + return PSparseMatrix{T} +end + + """ """ struct DistributedCounterCOO{A,B,C,D} <: GridapType @@ -322,7 +365,7 @@ function Algebra.nz_allocation(a::DistributedCounterCOO) DistributedAllocationCOO(a.par_strategy,allocs,a.test_dofs_gids_prange,a.trial_dofs_gids_prange) end -struct DistributedAllocationCOO{A,B,C,D} <:GridapType +struct DistributedAllocationCOO{A,B,C,D} <: GridapType par_strategy::A allocs::B test_dofs_gids_prange::C @@ -342,13 +385,23 @@ end function change_axes(a::DistributedAllocationCOO{A,B,<:PRange,<:PRange}, axes::Tuple{<:PRange,<:PRange}) where {A,B} - local_axes=map(partition(axes[1]),partition(axes[2])) do rows,cols + local_axes = map(partition(axes[1]),partition(axes[2])) do rows,cols (Base.OneTo(local_length(rows)), Base.OneTo(local_length(cols))) end - allocs=map(change_axes,a.allocs,local_axes) + allocs = map(change_axes,a.allocs,local_axes) DistributedAllocationCOO(a.par_strategy,allocs,axes[1],axes[2]) end +function change_axes(a::MatrixBlock{<:DistributedAllocationCOO}, + axes::Tuple{<:Vector,<:Vector}) + block_ids = CartesianIndices(a.array) + rows, cols = axes + array = map(block_ids) do I + change_axes(a[I],(rows[I[1]],cols[I[2]])) + end + return ArrayBlock(array,a.touched) +end + function local_views(a::DistributedAllocationCOO) a.allocs end @@ -359,23 +412,28 @@ function local_views(a::DistributedAllocationCOO,test_dofs_gids_prange,trial_dof a.allocs end -function first_gdof_from_ids(ids) - lid_to_gid=local_to_global(ids) - owner_to_lid=own_to_local(ids) - own_length(ids)>0 ? Int(lid_to_gid[first(owner_to_lid)]) : 1 +function local_views(a::MatrixBlock{<:DistributedAllocationCOO}) + array = map(local_views,a.array) |> to_parray_of_arrays + return map(ai -> ArrayBlock(ai,a.touched),array) end -function find_gid_and_owner(ighost_to_jghost,jindices) - jghost_to_local=ghost_to_local(jindices) - jlocal_to_global=local_to_global(jindices) - jlocal_to_owner=local_to_owner(jindices) - ighost_to_jlocal = view(jghost_to_local,ighost_to_jghost) +function get_allocations(a::DistributedAllocationCOO) + I,J,V = map(local_views(a)) do alloc + alloc.I, alloc.J, alloc.V + end |> tuple_of_arrays + return I,J,V +end - ighost_to_global = jlocal_to_global[ighost_to_jlocal] - ighost_to_owner = jlocal_to_owner[ighost_to_jlocal] - ighost_to_global, ighost_to_owner +function get_allocations(a::ArrayBlock{<:DistributedAllocationCOO}) + tuple_of_array_of_parrays = map(get_allocations,a.array) |> tuple_of_arrays + return tuple_of_array_of_parrays end +get_test_gids(a::DistributedAllocationCOO) = a.test_dofs_gids_prange +get_trial_gids(a::DistributedAllocationCOO) = a.trial_dofs_gids_prange +get_test_gids(a::ArrayBlock{<:DistributedAllocationCOO}) = map(get_test_gids,diag(a.array)) +get_trial_gids(a::ArrayBlock{<:DistributedAllocationCOO}) = map(get_trial_gids,diag(a.array)) + function Algebra.create_from_nz(a::PSparseMatrix) # For FullyAssembledRows the underlying Exchanger should # not have ghost layer making assemble! do nothing (TODO check) @@ -386,218 +444,76 @@ end function Algebra.create_from_nz(a::DistributedAllocationCOO{<:FullyAssembledRows}) f(x) = nothing A, = _fa_create_from_nz_with_callback(f,a) - A + return A end -# The given ids are assumed to be a sub-set of the lids -function ghost_lids_touched(a::AbstractLocalIndices,gids::AbstractVector{<:Integer}) - i = 0 - ghost_lids_touched = fill(false,ghost_length(a)) - glo_to_loc=global_to_local(a) - loc_to_gho=local_to_ghost(a) - for gid in gids - lid = glo_to_loc[gid] - ghost_lid = loc_to_gho[lid] - if ghost_lid > 0 && !ghost_lids_touched[ghost_lid] - ghost_lids_touched[ghost_lid] = true - i += 1 - end - end - gids_ghost_lid_to_ghost_lid = Vector{Int32}(undef,i) - i = 0 - ghost_lids_touched .= false - for gid in gids - lid = glo_to_loc[gid] - ghost_lid = loc_to_gho[lid] - if ghost_lid > 0 && !ghost_lids_touched[ghost_lid] - ghost_lids_touched[ghost_lid] = true - i += 1 - gids_ghost_lid_to_ghost_lid[i] = ghost_lid - end - end - gids_ghost_lid_to_ghost_lid +function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:FullyAssembledRows}}) + f(x) = nothing + A, = _fa_create_from_nz_with_callback(f,a) + return A end -# Find the neighbours of partition1 trying -# to use those in partition2 as a hint -function _find_neighbours!(partition1, partition2) - partition2_snd, partition2_rcv = assembly_neighbors(partition2) - partition2_graph = ExchangeGraph(partition2_snd, partition2_rcv) - assembly_neighbors(partition1; neighbors=partition2_graph) -end - function _fa_create_from_nz_with_callback(callback,a) # Recover some data - I,J,V = map(a.allocs) do alloc - alloc.I, alloc.J, alloc.V - end |> tuple_of_arrays - test_dofs_gids_prange = a.test_dofs_gids_prange - trial_dofs_gids_prange = a.trial_dofs_gids_prange - test_dofs_gids_partition = partition(test_dofs_gids_prange) - trial_dofs_gids_partition = partition(trial_dofs_gids_prange) - ngcdofs = length(trial_dofs_gids_prange) - nocdofs = map(own_length,trial_dofs_gids_partition) - - rows = _setup_prange_rows_without_ghosts(test_dofs_gids_prange) + I,J,V = get_allocations(a) + test_dofs_gids_prange = get_test_gids(a) + trial_dofs_gids_prange = get_trial_gids(a) + rows = _setup_prange(test_dofs_gids_prange,I;ghost=false,ax=:rows) b = callback(rows) # convert I and J to global dof ids - map(to_global!,I,test_dofs_gids_partition) - map(to_global!,J,trial_dofs_gids_partition) + to_global_indices!(I,test_dofs_gids_prange;ax=:rows) + to_global_indices!(J,trial_dofs_gids_prange;ax=:cols) # Create the range for cols - # Note that we are calling here the _setup_rows_prange(...) function even though we - # are setting up the range for the cols. Inherent to the FullyAssembledRows() - # assembly strategy with a single layer of ghost cells - # is the fact that "The global column identifiers of matrix entries - # located in rows that a given processor owns have to be such they belong to the set of - # ghost DoFs in the local partition of the FE space corresponding to such processor." - # Any entry in the global matrix sparsity pattern that fulfills this condition is just - # ignored in the FullyAssembledRows() assembly strategy with a single layer of ghost cells - cols = _setup_rows_prange(trial_dofs_gids_prange,J) - + cols = _setup_prange(trial_dofs_gids_prange,J;ax=:cols) # Convert again I,J to local numeration - map(to_local!,I,partition(rows)) - map(to_local!,J,partition(cols)) + to_local_indices!(I,rows;ax=:rows) + to_local_indices!(J,cols;ax=:cols) # Adjust local matrix size to linear system's index sets - asys=change_axes(a,(rows,cols)) + asys = change_axes(a,(rows,cols)) # Compress local portions - values = map(create_from_nz,asys.allocs) + values = map(create_from_nz,local_views(asys)) # Finally build the matrix - A = PSparseMatrix(values,partition(rows),partition(cols)) - - A, b + A = _setup_matrix(values,rows,cols) + return A, b end function Algebra.create_from_nz(a::DistributedAllocationCOO{<:SubAssembledRows}) f(x) = nothing A, = _sa_create_from_nz_with_callback(f,f,a) - A + return A end -function _generate_column_owner(J,trial_dofs_partition) - map(J,trial_dofs_partition) do J, indices - glo_to_loc=global_to_local(indices) - loc_to_own=local_to_owner(indices) - map(x->loc_to_own[glo_to_loc[x]], J) - end -end - -function assemble_coo_with_column_owner!(I,J,Jown,V,row_partition) - """ - Returns three JaggedArrays with the coo triplets - to be sent to the corresponding owner parts in parts_snd - """ - function setup_snd(part,parts_snd,row_lids,coo_entries_with_column_owner) - global_to_local_row = global_to_local(row_lids) - local_row_to_owner = local_to_owner(row_lids) - owner_to_i = Dict(( owner=>i for (i,owner) in enumerate(parts_snd) )) - ptrs = zeros(Int32,length(parts_snd)+1) - k_gi, k_gj, k_jo, k_v = coo_entries_with_column_owner - for k in 1:length(k_gi) - gi = k_gi[k] - li = global_to_local_row[gi] - owner = local_row_to_owner[li] - if owner != part - ptrs[owner_to_i[owner]+1] +=1 - end - end - PArrays.length_to_ptrs!(ptrs) - gi_snd_data = zeros(eltype(k_gi),ptrs[end]-1) - gj_snd_data = zeros(eltype(k_gj),ptrs[end]-1) - jo_snd_data = zeros(eltype(k_jo),ptrs[end]-1) - v_snd_data = zeros(eltype(k_v),ptrs[end]-1) - for k in 1:length(k_gi) - gi = k_gi[k] - li = global_to_local_row[gi] - owner = local_row_to_owner[li] - if owner != part - gj = k_gj[k] - v = k_v[k] - p = ptrs[owner_to_i[owner]] - gi_snd_data[p] = gi - gj_snd_data[p] = gj - jo_snd_data[p] = k_jo[k] - v_snd_data[p] = v - k_v[k] = zero(v) - ptrs[owner_to_i[owner]] += 1 - end - end - PArrays.rewind_ptrs!(ptrs) - gi_snd = JaggedArray(gi_snd_data,ptrs) - gj_snd = JaggedArray(gj_snd_data,ptrs) - jo_snd = JaggedArray(jo_snd_data,ptrs) - v_snd = JaggedArray(v_snd_data,ptrs) - gi_snd, gj_snd, jo_snd, v_snd - end - """ - Pushes to coo_entries_with_column_owner the tuples - gi_rcv,gj_rcv,jo_rcv,v_rcv received from remote processes - """ - function setup_rcv!(coo_entries_with_column_owner,gi_rcv,gj_rcv,jo_rcv,v_rcv) - k_gi, k_gj, k_jo, k_v = coo_entries_with_column_owner - current_n = length(k_gi) - new_n = current_n + length(gi_rcv.data) - resize!(k_gi,new_n) - resize!(k_gj,new_n) - resize!(k_jo,new_n) - resize!(k_v,new_n) - for p in 1:length(gi_rcv.data) - k_gi[current_n+p] = gi_rcv.data[p] - k_gj[current_n+p] = gj_rcv.data[p] - k_jo[current_n+p] = jo_rcv.data[p] - k_v[current_n+p] = v_rcv.data[p] - end - end - part = linear_indices(row_partition) - parts_snd, parts_rcv = assembly_neighbors(row_partition) - coo_entries_with_column_owner = map(tuple,I,J,Jown,V) - gi_snd, gj_snd, jo_snd, v_snd = map(setup_snd,part,parts_snd,row_partition,coo_entries_with_column_owner) |> tuple_of_arrays - graph = ExchangeGraph(parts_snd,parts_rcv) - t1 = exchange(gi_snd,graph) - t2 = exchange(gj_snd,graph) - t3 = exchange(jo_snd,graph) - t4 = exchange(v_snd,graph) - @async begin - gi_rcv = fetch(t1) - gj_rcv = fetch(t2) - jo_rcv = fetch(t3) - v_rcv = fetch(t4) - map(setup_rcv!,coo_entries_with_column_owner,gi_rcv,gj_rcv,jo_rcv,v_rcv) - I,J,Jown,V - end +function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:SubAssembledRows}}) + f(x) = nothing + A, = _sa_create_from_nz_with_callback(f,f,a) + return A end - function _sa_create_from_nz_with_callback(callback,async_callback,a) # Recover some data - I,J,V = map(a.allocs) do alloc - alloc.I, alloc.J, alloc.V - end |> tuple_of_arrays - test_dofs_gids_prange = a.test_dofs_gids_prange - trial_dofs_gids_prange = a.trial_dofs_gids_prange - test_dofs_gids_partition = partition(test_dofs_gids_prange) - trial_dofs_gids_partition = partition(trial_dofs_gids_prange) - ngrdofs = length(test_dofs_gids_prange) - ngcdofs = length(test_dofs_gids_prange) + I,J,V = get_allocations(a) + test_dofs_gids_prange = get_test_gids(a) + trial_dofs_gids_prange = get_trial_gids(a) # convert I and J to global dof ids - map(to_global!,I,test_dofs_gids_partition) - map(to_global!,J,trial_dofs_gids_partition) + to_global_indices!(I,test_dofs_gids_prange;ax=:rows) + to_global_indices!(J,trial_dofs_gids_prange;ax=:cols) # Create the Prange for the rows - rows = _setup_rows_prange(test_dofs_gids_prange,I) + rows = _setup_prange(test_dofs_gids_prange,I;ax=:rows) # Move (I,J,V) triplets to the owner process of each row I. # The triplets are accompanyed which Jo which is the process column owner - Jo=_generate_column_owner(J,trial_dofs_gids_partition) - t=assemble_coo_with_column_owner!(I,J,Jo,V,partition(rows)) + Jo = get_gid_owners(J,trial_dofs_gids_prange;ax=:cols) + t = _assemble_coo!(I,J,V,rows;owners=Jo) # Here we can overlap computations # This is a good place to overlap since @@ -608,32 +524,34 @@ function _sa_create_from_nz_with_callback(callback,async_callback,a) wait(t) # Create the Prange for the cols - cols = _setup_cols_prange(trial_dofs_gids_prange,J,Jo) + cols = _setup_prange(trial_dofs_gids_prange,J;ax=:cols,owners=Jo) # Overlap rhs communications with CSC compression t2 = async_callback(b) # Convert again I,J to local numeration - map(to_local!,I,partition(rows)) - map(to_local!,J,partition(cols)) + to_local_indices!(I,rows;ax=:rows) + to_local_indices!(J,cols;ax=:cols) # Adjust local matrix size to linear system's index sets - asys=change_axes(a,(rows,cols)) + asys = change_axes(a,(rows,cols)) # Compress the local matrices - values = map(create_from_nz,asys.allocs) + values = map(create_from_nz,local_views(asys)) # Wait the transfer to finish - if t2 !== nothing + if !isa(t2,Nothing) wait(t2) end # Finally build the matrix - A = PSparseMatrix(values,partition(rows),partition(cols)) - - A, b + A = _setup_matrix(values,rows,cols) + return A, b end + +# PVector assembly + struct PVectorBuilder{T,B} local_vector_type::Type{T} par_strategy::B @@ -649,6 +567,11 @@ function Algebra.nz_counter(builder::PVectorBuilder,axs::Tuple{<:PRange}) PVectorCounter(builder.par_strategy,counters,rows) end +function Algebra.get_array_type(::PVectorBuilder{Tv}) where Tv + T = eltype(Tv) + return PVector{T} +end + struct PVectorCounter{A,B,C} par_strategy::A counters::B @@ -687,91 +610,8 @@ function local_views(a::PVectorAllocationTrackOnlyValues,rows) a.values end -# Create PRange for the rows of the linear system -# without local ghost dofs as per required in the -# FullyAssembledRows() parallel assembly strategy -function _setup_prange_rows_without_ghosts(test_dofs_gids_prange) - ngdofs = length(test_dofs_gids_prange) - test_dofs_gids_partition = partition(test_dofs_gids_prange) - nodofs = map(own_length,test_dofs_gids_partition) - rindices=map(test_dofs_gids_partition) do dofs_indices - owner = part_id(dofs_indices) - own_indices=OwnIndices(ngdofs,owner,own_to_global(dofs_indices)) - ghost_indices=GhostIndices(ngdofs,Int64[],Int32[]) - OwnAndGhostIndices(own_indices,ghost_indices) - end - PRange(rindices) -end - -function _setup_rows_prange(test_dofs_gids_prange,I) - ngdofs = length(test_dofs_gids_prange) - test_dofs_gids_partition = partition(test_dofs_gids_prange) - I_ghost_lids_to_test_dofs_ghost_lids = map(ghost_lids_touched,test_dofs_gids_partition,I) - _setup_rows_prange_impl_(ngdofs,I_ghost_lids_to_test_dofs_ghost_lids,test_dofs_gids_partition) -end - -function _setup_rows_prange_impl_(ngdofs,I_ghost_lids_to_dofs_ghost_lids,test_dofs_gids_partition) - gids_ghost_to_global, gids_ghost_to_owner = map( - find_gid_and_owner,I_ghost_lids_to_dofs_ghost_lids,test_dofs_gids_partition) |> tuple_of_arrays - - indices=map(test_dofs_gids_partition, - gids_ghost_to_global, - gids_ghost_to_owner) do dofs_indices, ghost_to_global, ghost_to_owner - owner = part_id(dofs_indices) - own_indices=OwnIndices(ngdofs,owner,own_to_global(dofs_indices)) - ghost_indices=GhostIndices(ngdofs,ghost_to_global,ghost_to_owner) - OwnAndGhostIndices(own_indices,ghost_indices) - end - # Here we are assuming that the sparse communication graph underlying test_dofs_gids_partition - # is a superset of the one underlying indices. This is (has to be) true for the rows of the linear system. - # The precondition required for the consistency of any parallel assembly process in GridapDistributed - # is that each processor can determine locally with a single layer of ghost cells the global indices and associated - # processor owners of the rows that it touches after assembly of integration terms posed on locally-owned entities - # (i.e., either cells or faces). - _find_neighbours!(indices, test_dofs_gids_partition) - PRange(indices) -end - -function _setup_cols_prange(trial_dofs_gids_prange,J,Jown) - ngdofs = length(trial_dofs_gids_prange) - trial_dofs_gids_partition = partition(trial_dofs_gids_prange) - - J_ghost_to_global, J_ghost_to_owner = map(J,Jown,trial_dofs_gids_partition) do J, Jown, trial_dofs_gids_partition - ghost_touched=Dict{Int,Bool}() - ghost_to_global=Int64[] - ghost_to_owner=Int64[] - me=part_id(trial_dofs_gids_partition) - for (j,jo) in zip(J,Jown) - if jo!=me - if !haskey(ghost_touched,j) - push!(ghost_to_global,j) - push!(ghost_to_owner,jo) - ghost_touched[j]=true - end - end - end - ghost_to_global, ghost_to_owner - end |> tuple_of_arrays - - indices=map(trial_dofs_gids_partition, - J_ghost_to_global, - J_ghost_to_owner) do dofs_indices, ghost_to_global, ghost_to_owner - owner = part_id(dofs_indices) - own_indices=OwnIndices(ngdofs,owner,own_to_global(dofs_indices)) - ghost_indices=GhostIndices(ngdofs,ghost_to_global,ghost_to_owner) - OwnAndGhostIndices(own_indices,ghost_indices) - end - # Here we cannot assume that the sparse communication graph underlying - # trial_dofs_gids_partition is a superset of the one underlying indices. - # Here we chould check whether it is included and call _find_neighbours!() - # if this is the case. At present, we are not taking advantage of this, - # but let the parallel scalable algorithm to compute the reciprocal to do the - # job. - PRange(indices) -end - function Algebra.create_from_nz(a::PVectorAllocationTrackOnlyValues{<:FullyAssembledRows}) - rows = _setup_prange_rows_without_ghosts(a.test_dofs_gids_prange) + rows = _setup_prange_without_ghosts(a.test_dofs_gids_prange) _rhs_callback(a,rows) end @@ -818,7 +658,7 @@ end function Algebra.create_from_nz(a::PVector) assemble!(a) |> wait - a + return a end function Algebra.create_from_nz( @@ -830,7 +670,7 @@ function Algebra.create_from_nz( end A,b = _fa_create_from_nz_with_callback(callback,a) - A,b + return A,b end struct PVectorAllocationTrackTouchedAndValues{A,B,C} @@ -853,7 +693,7 @@ function Algebra.create_from_nz( end A,b = _sa_create_from_nz_with_callback(callback,async_callback,a) - A,b + return A,b end struct ArrayAllocationTrackTouchedAndValues{A} @@ -877,7 +717,7 @@ end if !(a.touched[i]) a.touched[i]=true end - if v!=nothing + if !isa(v,Nothing) a.values[i]=c(v,a.values[i]) end end @@ -887,7 +727,7 @@ end @notimplemented end @inline function Arrays.add_entries!(c::Function,a::ArrayAllocationTrackTouchedAndValues,v,i) - if v != nothing + if !isa(v,Nothing) for (ve,ie) in zip(v,i) Arrays.add_entry!(c,a,ve,ie) end @@ -901,11 +741,11 @@ end function Arrays.nz_allocation(a::DistributedCounterCOO{<:SubAssembledRows}, b::PVectorCounter{<:SubAssembledRows}) - A = nz_allocation(a) - dofs = b.test_dofs_gids_prange + A = nz_allocation(a) + dofs = b.test_dofs_gids_prange values = map(nz_allocation,b.counters) - B=PVectorAllocationTrackOnlyValues(b.par_strategy,values,dofs) - A,B + B = PVectorAllocationTrackOnlyValues(b.par_strategy,values,dofs) + return A,B end function Arrays.nz_allocation(a::PVectorCounter{<:SubAssembledRows}) @@ -914,10 +754,10 @@ function Arrays.nz_allocation(a::PVectorCounter{<:SubAssembledRows}) touched = map(values) do values fill!(Vector{Bool}(undef,length(values)),false) end - allocations=map(values,touched) do values,touched + allocations = map(values,touched) do values,touched ArrayAllocationTrackTouchedAndValues(touched,values) end - PVectorAllocationTrackTouchedAndValues(allocations,values,dofs) + return PVectorAllocationTrackTouchedAndValues(allocations,values,dofs) end function local_views(a::PVectorAllocationTrackTouchedAndValues) @@ -926,36 +766,385 @@ end function Algebra.create_from_nz(a::PVectorAllocationTrackTouchedAndValues) test_dofs_prange = a.test_dofs_gids_prange # dof ids of the test space - test_dofs_prange_partition = partition(test_dofs_prange) ngrdofs = length(test_dofs_prange) - + # Find the ghost rows - I_ghost_lids_to_dofs_ghost_lids=map(local_views(a.allocations),test_dofs_prange_partition) do allocation, indices - dofs_lids_touched=findall(allocation.touched) + allocations = local_views(a.allocations) + indices = partition(test_dofs_prange) + I_ghost_lids_to_dofs_ghost_lids = map(allocations, indices) do allocation, indices + dofs_lids_touched = findall(allocation.touched) loc_to_gho = local_to_ghost(indices) n_I_ghost_lids = count((x)->loc_to_gho[x]!=0,dofs_lids_touched) I_ghost_lids = Vector{Int32}(undef,n_I_ghost_lids) - cur=1 + cur = 1 for lid in dofs_lids_touched - dof_lid=loc_to_gho[lid] - if dof_lid!=0 - I_ghost_lids[cur]=dof_lid - cur=cur+1 + dof_lid = loc_to_gho[lid] + if dof_lid != 0 + I_ghost_lids[cur] = dof_lid + cur = cur+1 end end I_ghost_lids end - rows = _setup_rows_prange_impl_(ngrdofs, - I_ghost_lids_to_dofs_ghost_lids, - test_dofs_prange_partition) + gids_ghost_to_global, gids_ghost_to_owner = map( + find_gid_and_owner,I_ghost_lids_to_dofs_ghost_lids,indices) |> tuple_of_arrays + + rows = _setup_prange_impl_(ngrdofs,indices,gids_ghost_to_global,gids_ghost_to_owner) + b = _rhs_callback(a,rows) + t2 = assemble!(b) + + # Wait the transfer to finish + if t2 !== nothing + wait(t2) + end + return b +end + + +# Common Assembly Utilities + +function first_gdof_from_ids(ids) + if own_length(ids) == 0 + return 1 + end + lid_to_gid = local_to_global(ids) + owned_to_lid = own_to_local(ids) + return Int(lid_to_gid[first(owned_to_lid)]) +end + +function find_gid_and_owner(ighost_to_jghost,jindices) + jghost_to_local = ghost_to_local(jindices) + jlocal_to_global = local_to_global(jindices) + jlocal_to_owner = local_to_owner(jindices) + ighost_to_jlocal = view(jghost_to_local,ighost_to_jghost) + + ighost_to_global = jlocal_to_global[ighost_to_jlocal] + ighost_to_owner = jlocal_to_owner[ighost_to_jlocal] + return ighost_to_global, ighost_to_owner +end + +# The given ids are assumed to be a sub-set of the lids +function ghost_lids_touched(a::AbstractLocalIndices,gids::AbstractVector{<:Integer}) + glo_to_loc = global_to_local(a) + loc_to_gho = local_to_ghost(a) + + # First pass: Allocate + i = 0 + ghost_lids_touched = fill(false,ghost_length(a)) + for gid in gids + lid = glo_to_loc[gid] + ghost_lid = loc_to_gho[lid] + if ghost_lid > 0 && !ghost_lids_touched[ghost_lid] + ghost_lids_touched[ghost_lid] = true + i += 1 + end + end + gids_ghost_lid_to_ghost_lid = Vector{Int32}(undef,i) + + # Second pass: fill + i = 1 + fill!(ghost_lids_touched,false) + for gid in gids + lid = glo_to_loc[gid] + ghost_lid = loc_to_gho[lid] + if ghost_lid > 0 && !ghost_lids_touched[ghost_lid] + ghost_lids_touched[ghost_lid] = true + gids_ghost_lid_to_ghost_lid[i] = ghost_lid + i += 1 + end + end + + return gids_ghost_lid_to_ghost_lid +end + +# Find the neighbours of partition1 trying +# to use those in partition2 as a hint +function _find_neighbours!(partition1, partition2) + partition2_snd, partition2_rcv = assembly_neighbors(partition2) + partition2_graph = ExchangeGraph(partition2_snd, partition2_rcv) + return assembly_neighbors(partition1; neighbors=partition2_graph) +end + +# to_global! & to_local! analogs, needed for dispatching in block assembly + +function to_local_indices!(I,ids::PRange;kwargs...) + map(to_local!,I,partition(ids)) +end + +function to_global_indices!(I,ids::PRange;kwargs...) + map(to_global!,I,partition(ids)) +end + +function get_gid_owners(I,ids::PRange;kwargs...) + map(I,partition(ids)) do I, indices + glo_to_loc = global_to_local(indices) + loc_to_own = local_to_owner(indices) + map(x->loc_to_own[glo_to_loc[x]], I) + end +end + +for f in [:to_local_indices!, :to_global_indices!, :get_gid_owners] + @eval begin + function $f(I::Vector,ids::AbstractVector{<:PRange};kwargs...) + map($f,I,ids) + end + + function $f(I::Matrix,ids::AbstractVector{<:PRange};ax=:rows) + @check ax ∈ [:rows,:cols] + block_ids = CartesianIndices(I) + map(block_ids) do id + i = id[1]; j = id[2]; + if ax == :rows + $f(I[i,j],ids[i]) + else + $f(I[i,j],ids[j]) + end + end + end + end +end + +# _setup_matrix : local matrices + global PRanges -> Global matrix + +function _setup_matrix(values,rows::PRange,cols::PRange) + return PSparseMatrix(values,partition(rows),partition(cols)) +end + +function _setup_matrix(values,rows::Vector{<:PRange},cols::Vector{<:PRange}) + block_ids = CartesianIndices((length(rows),length(cols))) + block_mats = map(block_ids) do I + block_values = map(v -> blocks(v)[I],values) + return _setup_matrix(block_values,rows[I[1]],cols[I[2]]) + end + return mortar(block_mats) +end + +# _assemble_coo! : local coo triplets + global PRange -> Global coo values - b = _rhs_callback(a,rows) - t2 = assemble!(b) +function _assemble_coo!(I,J,V,rows::PRange;owners=nothing) + if isa(owners,Nothing) + PArrays.assemble_coo!(I,J,V,partition(rows)) + else + assemble_coo_with_column_owner!(I,J,V,partition(rows),owners) + end +end - # Wait the transfer to finish - if t2 !== nothing - wait(t2) - end - b +function _assemble_coo!(I,J,V,rows::Vector{<:PRange};owners=nothing) + block_ids = CartesianIndices(I) + map(block_ids) do id + i = id[1]; j = id[2]; + if isa(owners,Nothing) + _assemble_coo!(I[i,j],J[i,j],V[i,j],rows[i]) + else + _assemble_coo!(I[i,j],J[i,j],V[i,j],rows[i],owners=owners[i,j]) + end + end +end + +function assemble_coo_with_column_owner!(I,J,V,row_partition,Jown) + """ + Returns three JaggedArrays with the coo triplets + to be sent to the corresponding owner parts in parts_snd + """ + function setup_snd(part,parts_snd,row_lids,coo_entries_with_column_owner) + global_to_local_row = global_to_local(row_lids) + local_row_to_owner = local_to_owner(row_lids) + owner_to_i = Dict(( owner=>i for (i,owner) in enumerate(parts_snd) )) + ptrs = zeros(Int32,length(parts_snd)+1) + k_gi, k_gj, k_jo, k_v = coo_entries_with_column_owner + for k in 1:length(k_gi) + gi = k_gi[k] + li = global_to_local_row[gi] + owner = local_row_to_owner[li] + if owner != part + ptrs[owner_to_i[owner]+1] +=1 + end + end + PArrays.length_to_ptrs!(ptrs) + gi_snd_data = zeros(eltype(k_gi),ptrs[end]-1) + gj_snd_data = zeros(eltype(k_gj),ptrs[end]-1) + jo_snd_data = zeros(eltype(k_jo),ptrs[end]-1) + v_snd_data = zeros(eltype(k_v),ptrs[end]-1) + for k in 1:length(k_gi) + gi = k_gi[k] + li = global_to_local_row[gi] + owner = local_row_to_owner[li] + if owner != part + gj = k_gj[k] + v = k_v[k] + p = ptrs[owner_to_i[owner]] + gi_snd_data[p] = gi + gj_snd_data[p] = gj + jo_snd_data[p] = k_jo[k] + v_snd_data[p] = v + k_v[k] = zero(v) + ptrs[owner_to_i[owner]] += 1 + end + end + PArrays.rewind_ptrs!(ptrs) + gi_snd = JaggedArray(gi_snd_data,ptrs) + gj_snd = JaggedArray(gj_snd_data,ptrs) + jo_snd = JaggedArray(jo_snd_data,ptrs) + v_snd = JaggedArray(v_snd_data,ptrs) + gi_snd, gj_snd, jo_snd, v_snd + end + """ + Pushes to coo_entries_with_column_owner the tuples + gi_rcv,gj_rcv,jo_rcv,v_rcv received from remote processes + """ + function setup_rcv!(coo_entries_with_column_owner,gi_rcv,gj_rcv,jo_rcv,v_rcv) + k_gi, k_gj, k_jo, k_v = coo_entries_with_column_owner + current_n = length(k_gi) + new_n = current_n + length(gi_rcv.data) + resize!(k_gi,new_n) + resize!(k_gj,new_n) + resize!(k_jo,new_n) + resize!(k_v,new_n) + for p in 1:length(gi_rcv.data) + k_gi[current_n+p] = gi_rcv.data[p] + k_gj[current_n+p] = gj_rcv.data[p] + k_jo[current_n+p] = jo_rcv.data[p] + k_v[current_n+p] = v_rcv.data[p] + end + end + part = linear_indices(row_partition) + parts_snd, parts_rcv = assembly_neighbors(row_partition) + coo_entries_with_column_owner = map(tuple,I,J,Jown,V) + gi_snd, gj_snd, jo_snd, v_snd = map(setup_snd,part,parts_snd,row_partition,coo_entries_with_column_owner) |> tuple_of_arrays + graph = ExchangeGraph(parts_snd,parts_rcv) + t1 = exchange(gi_snd,graph) + t2 = exchange(gj_snd,graph) + t3 = exchange(jo_snd,graph) + t4 = exchange(v_snd,graph) + @async begin + gi_rcv = fetch(t1) + gj_rcv = fetch(t2) + jo_rcv = fetch(t3) + v_rcv = fetch(t4) + map(setup_rcv!,coo_entries_with_column_owner,gi_rcv,gj_rcv,jo_rcv,v_rcv) + I,J,Jown,V + end +end + +# dofs_gids_prange can be either test_dofs_gids_prange or trial_dofs_gids_prange +# In the former case, gids is a vector of global test dof identifiers, while in the +# latter, a vector of global trial dof identifiers +function _setup_prange(dofs_gids_prange::PRange,gids;ghost=true,owners=nothing,kwargs...) + if !ghost + _setup_prange_without_ghosts(dofs_gids_prange) + elseif isa(owners,Nothing) + _setup_prange_with_ghosts(dofs_gids_prange,gids) + else + _setup_prange_with_ghosts(dofs_gids_prange,gids,owners) + end +end + +function _setup_prange(dofs_gids_prange::AbstractVector{<:PRange}, + gids::AbstractMatrix; + ax=:rows,ghost=true,owners=nothing) + @check ax ∈ (:rows,:cols) + block_ids = LinearIndices(dofs_gids_prange) + + gids_ax_slice, _owners = map(block_ids,dofs_gids_prange) do id,prange + gids_ax_slice = (ax == :rows) ? gids[id,:] : gids[:,id] + _owners = nothing + if ghost + gids_ax_slice = map(x -> union(x...), to_parray_of_arrays(gids_ax_slice)) + if !isa(owners,Nothing) # Recompute owners for the union + _owners = get_gid_owners(gids_ax_slice,prange) + end + end + return gids_ax_slice, _owners + end |> tuple_of_arrays + + return map((p,g,o) -> _setup_prange(p,g;ghost=ghost,owners=o),dofs_gids_prange,gids_ax_slice,_owners) +end + +# Create PRange for the rows of the linear system +# without local ghost dofs as per required in the +# FullyAssembledRows() parallel assembly strategy +function _setup_prange_without_ghosts(dofs_gids_prange::PRange) + ngdofs = length(dofs_gids_prange) + indices = map(partition(dofs_gids_prange)) do dofs_indices + owner = part_id(dofs_indices) + own_indices = OwnIndices(ngdofs,owner,own_to_global(dofs_indices)) + ghost_indices = GhostIndices(ngdofs,Int64[],Int32[]) + OwnAndGhostIndices(own_indices,ghost_indices) + end + return PRange(indices) +end + +# Here we are assuming that the sparse communication graph underlying test_dofs_gids_partition +# is a superset of the one underlying indices. This is (has to be) true for the rows of the linear system. +# The precondition required for the consistency of any parallel assembly process in GridapDistributed +# is that each processor can determine locally with a single layer of ghost cells the global indices and associated +# processor owners of the rows that it touches after assembly of integration terms posed on locally-owned entities +# (i.e., either cells or faces). +function _setup_prange_with_ghosts(dofs_gids_prange::PRange,gids) + ngdofs = length(dofs_gids_prange) + dofs_gids_partition = partition(dofs_gids_prange) + + # Selected ghost ids -> dof PRange ghost ids + gids_ghost_lids_to_dofs_ghost_lids = map(ghost_lids_touched,dofs_gids_partition,gids) + + # Selected ghost ids -> [global dof ids, owner processor ids] + gids_ghost_to_global, gids_ghost_to_owner = map( + find_gid_and_owner,gids_ghost_lids_to_dofs_ghost_lids,dofs_gids_partition) |> tuple_of_arrays + + return _setup_prange_impl_(ngdofs,dofs_gids_partition,gids_ghost_to_global,gids_ghost_to_owner) +end + +# Here we cannot assume that the sparse communication graph underlying +# trial_dofs_gids_partition is a superset of the one underlying indices. +# Here we chould check whether it is included and call _find_neighbours!() +# if this is the case. At present, we are not taking advantage of this, +# but let the parallel scalable algorithm to compute the reciprocal to do the job. +function _setup_prange_with_ghosts(dofs_gids_prange::PRange,gids,owners) + ngdofs = length(dofs_gids_prange) + dofs_gids_partition = partition(dofs_gids_prange) + + # Selected ghost ids -> [global dof ids, owner processor ids] + gids_ghost_to_global, gids_ghost_to_owner = map( + gids,owners,dofs_gids_partition) do gids, owners, indices + ghost_touched = Dict{Int,Bool}() + ghost_to_global = Int64[] + ghost_to_owner = Int64[] + me = part_id(indices) + for (j,jo) in zip(gids,owners) + if jo != me + if !haskey(ghost_touched,j) + push!(ghost_to_global,j) + push!(ghost_to_owner,jo) + ghost_touched[j] = true + end + end + end + ghost_to_global, ghost_to_owner + end |> tuple_of_arrays + + return _setup_prange_impl_(ngdofs, + dofs_gids_partition, + gids_ghost_to_global, + gids_ghost_to_owner; + discover_neighbours=false) +end + +function _setup_prange_impl_(ngdofs, + dofs_gids_partition, + gids_ghost_to_global, + gids_ghost_to_owner; + discover_neighbours=true) + indices = map(dofs_gids_partition, + gids_ghost_to_global, + gids_ghost_to_owner) do dofs_indices, ghost_to_global, ghost_to_owner + owner = part_id(dofs_indices) + own_indices = OwnIndices(ngdofs,owner,own_to_global(dofs_indices)) + ghost_indices = GhostIndices(ngdofs,ghost_to_global,ghost_to_owner) + OwnAndGhostIndices(own_indices,ghost_indices) + end + if discover_neighbours + _find_neighbours!(indices,dofs_gids_partition) + end + return PRange(indices) end diff --git a/src/BlockPartitionedArrays.jl b/src/BlockPartitionedArrays.jl new file mode 100644 index 00000000..5661432f --- /dev/null +++ b/src/BlockPartitionedArrays.jl @@ -0,0 +1,365 @@ + +""" + struct BlockPRange{A} <: AbstractUnitRange{Int} +""" +struct BlockPRange{A} <: AbstractUnitRange{Int} + ranges::Vector{PRange{A}} + function BlockPRange(ranges::Vector{<:PRange{A}}) where A + new{A}(ranges) + end +end + +Base.first(a::BlockPRange) = 1 +Base.last(a::BlockPRange) = sum(map(last,a.ranges)) + +BlockArrays.blocklength(a::BlockPRange) = length(a.ranges) +BlockArrays.blocksize(a::BlockPRange) = (blocklength(a),) +BlockArrays.blockaxes(a::BlockPRange) = (Block.(Base.OneTo(blocklength(a))),) +BlockArrays.blocks(a::BlockPRange) = a.ranges + +function PartitionedArrays.partition(a::BlockPRange) + return map(partition,blocks(a)) |> to_parray_of_arrays +end + +function Base.getindex(a::BlockPRange,inds::Block{1}) + a.ranges[inds.n...] +end + +""" + struct BlockPArray{V,T,N,A,B} <: BlockArrays.AbstractBlockArray{T,N} +""" +struct BlockPArray{V,T,N,A,B} <: BlockArrays.AbstractBlockArray{T,N} + blocks::Array{A,N} + axes::NTuple{N,B} + + function BlockPArray(blocks::Array{<:AbstractArray{T,N},N}, + axes ::NTuple{N,<:BlockPRange}) where {T,N} + @check all(map(d->size(blocks,d)==blocklength(axes[d]),1:N)) + local_type(::Type{<:PVector{V}}) where V = V + local_type(::Type{<:PSparseMatrix{V}}) where V = V + A = eltype(blocks) + B = typeof(first(axes)) + V = local_type(A) + new{V,T,N,A,B}(blocks,axes) + end +end + +const BlockPVector{V,T,A,B} = BlockPArray{V,T,1,A,B} +const BlockPMatrix{V,T,A,B} = BlockPArray{V,T,2,A,B} + +@inline function BlockPVector(blocks::Vector{<:PVector},rows::BlockPRange) + BlockPArray(blocks,(rows,)) +end + +@inline function BlockPVector(blocks::Vector{<:PVector},rows::Vector{<:PRange}) + BlockPVector(blocks,BlockPRange(rows)) +end + +@inline function BlockPMatrix(blocks::Matrix{<:PSparseMatrix},rows::BlockPRange,cols::BlockPRange) + BlockPArray(blocks,(rows,cols)) +end + +@inline function BlockPMatrix(blocks::Matrix{<:PSparseMatrix},rows::Vector{<:PRange},cols::Vector{<:PRange}) + BlockPMatrix(blocks,BlockPRange(rows),BlockPRange(cols)) +end + +function BlockPVector{V}(::UndefInitializer,rows::BlockPRange) where {V} + vals = map(blocks(rows)) do r + PVector{V}(undef,partition(r)) + end + return BlockPVector(vals,rows) +end + +function BlockPMatrix{V}(::UndefInitializer,rows::BlockPRange,cols::BlockPRange) where {V} + block_ids = CartesianIndices((blocklength(rows),blocklength(cols))) + block_rows = blocks(rows) + block_cols = blocks(cols) + vals = map(block_ids) do I + r = block_rows[I[1]] + c = block_cols[I[2]] + PSparseMatrix{V}(undef,partition(r),partition(c)) + end + return BlockPMatrix(vals,rows) +end + +# AbstractArray API + +Base.axes(a::BlockPArray) = a.axes +Base.size(a::BlockPArray) = Tuple(map(length,a.axes)) + +Base.IndexStyle(::Type{<:BlockPVector}) = IndexLinear() +Base.IndexStyle(::Type{<:BlockPMatrix}) = IndexCartesian() + +function Base.similar(a::BlockPVector,::Type{T},inds::Tuple{<:BlockPRange}) where T + vals = map(blocks(a),blocks(inds[1])) do ai,i + similar(ai,T,i) + end + return BlockPArray(vals,inds) +end + +function Base.similar(::Type{<:BlockPVector{V,T,A}},inds::Tuple{<:BlockPRange}) where {V,T,A} + rows = blocks(inds[1]) + values = map(rows) do r + return similar(A,(r,)) + end + return BlockPArray(values,inds) +end + +function Base.similar(a::BlockPMatrix,::Type{T},inds::Tuple{<:BlockPRange,<:BlockPRange}) where T + vals = map(CartesianIndices(blocksize(a))) do I + rows = inds[1].ranges[I[1]] + cols = inds[2].ranges[I[2]] + similar(a.blocks[I],T,(rows,cols)) + end + return BlockPArray(vals,inds) +end + +function Base.similar(::Type{<:BlockPMatrix{V,T,A}},inds::Tuple{<:BlockPRange,<:BlockPRange}) where {V,T,A} + rows = blocks(inds[1]) + cols = blocks(inds[2]) + values = map(CartesianIndices((length(rows),length(cols)))) do I + i,j = I[1],I[2] + return similar(A,(rows[i],cols[j])) + end + return BlockPArray(values,inds) +end + +function Base.getindex(a::BlockPArray{T,N},inds::Vararg{Int,N}) where {T,N} + @error "Scalar indexing not supported" +end +function Base.setindex(a::BlockPArray{T,N},v,inds::Vararg{Int,N}) where {T,N} + @error "Scalar indexing not supported" +end + +function Base.show(io::IO,k::MIME"text/plain",data::BlockPArray{T,N}) where {T,N} + v = first(blocks(data)) + s = prod(map(si->"$(si)x",blocksize(data)))[1:end-1] + map_main(partition(v)) do values + println(io,"$s-block BlockPArray{$T,$N}") + end +end + +function Base.zero(v::BlockPArray) + return mortar(map(zero,blocks(v))) +end + +function Base.copyto!(y::BlockPVector,x::BlockPVector) + @check blocklength(x) == blocklength(y) + for i in blockaxes(x,1) + copyto!(y[i],x[i]) + end + return y +end + +function Base.copyto!(y::BlockPMatrix,x::BlockPMatrix) + @check blocksize(x) == blocksize(y) + for i in blockaxes(x,1) + for j in blockaxes(x,2) + copyto!(y[i,j],x[i,j]) + end + end + return y +end + +function Base.fill!(a::BlockPVector,v) + map(blocks(a)) do a + fill!(a,v) + end + return a +end + +function Base.sum(a::BlockPArray) + return sum(map(sum,blocks(a))) +end + +Base.maximum(x::BlockPArray) = maximum(identity,x) +function Base.maximum(f::Function,x::BlockPArray) + maximum(map(xi->maximum(f,xi),blocks(x))) +end + +Base.minimum(x::BlockPArray) = minimum(identity,x) +function Base.minimum(f::Function,x::BlockPArray) + minimum(map(xi->minimum(f,xi),blocks(x))) +end + +function Base.:(==)(a::BlockPVector,b::BlockPVector) + A = length(a) == length(b) + B = all(map((ai,bi)->ai==bi,blocks(a),blocks(b))) + return A && B +end + +function Base.any(f::Function,x::BlockPVector) + any(map(xi->any(f,xi),blocks(x))) +end + +function Base.all(f::Function,x::BlockPVector) + all(map(xi->all(f,xi),blocks(x))) +end + +function LinearAlgebra.rmul!(a::BlockPVector,v::Number) + map(ai->rmul!(ai,v),blocks(a)) + return a +end + +# AbstractBlockArray API + +BlockArrays.blocks(a::BlockPArray) = a.blocks + +function Base.getindex(a::BlockPArray,inds::Block{1}) + a.blocks[inds.n...] +end +function Base.getindex(a::BlockPArray{V,T,N},inds::Block{N}) where {V,T,N} + a.blocks[inds.n...] +end +function Base.getindex(a::BlockPArray{V,T,N},inds::Vararg{Block{1},N}) where {V,T,N} + a.blocks[map(i->i.n[1],inds)...] +end + +function BlockArrays.mortar(blocks::Vector{<:PVector}) + rows = map(b->axes(b,1),blocks) + BlockPVector(blocks,rows) +end + +function BlockArrays.mortar(blocks::Matrix{<:PSparseMatrix}) + rows = map(b->axes(b,1),blocks[:,1]) + cols = map(b->axes(b,2),blocks[1,:]) + + function check_axes(a,r,c) + A = PartitionedArrays.matching_local_indices(axes(a,1),r) + B = PartitionedArrays.matching_local_indices(axes(a,2),c) + return A & B + end + @check all(map(I -> check_axes(blocks[I],rows[I[1]],cols[I[2]]),CartesianIndices(size(blocks)))) + + return BlockPMatrix(blocks,rows,cols) +end + +# PartitionedArrays API + +Base.wait(t::Array) = map(wait,t) +Base.fetch(t::Array) = map(fetch,t) + +function PartitionedArrays.assemble!(a::BlockPArray) + map(assemble!,blocks(a)) +end + +function PartitionedArrays.consistent!(a::BlockPArray) + map(consistent!,blocks(a)) +end + +function PartitionedArrays.partition(a::BlockPArray) + vals = map(partition,blocks(a)) |> to_parray_of_arrays + return map(mortar,vals) +end + +function PartitionedArrays.to_trivial_partition(a::BlockPArray) + vals = map(PartitionedArrays.to_trivial_partition,blocks(a)) + return mortar(vals) +end + +function PartitionedArrays.local_values(a::BlockPArray) + vals = map(local_values,blocks(a)) |> to_parray_of_arrays + return map(mortar,vals) +end + +function PartitionedArrays.own_values(a::BlockPArray) + vals = map(own_values,blocks(a)) |> to_parray_of_arrays + return map(mortar,vals) +end + +function PartitionedArrays.ghost_values(a::BlockPArray) + vals = map(ghost_values,blocks(a)) |> to_parray_of_arrays + return map(mortar,vals) +end + +function PartitionedArrays.own_ghost_values(a::BlockPMatrix) + vals = map(own_ghost_values,blocks(a)) |> to_parray_of_arrays + return map(mortar,vals) +end + +function PartitionedArrays.ghost_own_values(a::BlockPMatrix) + vals = map(ghost_own_values,blocks(a)) |> to_parray_of_arrays + return map(mortar,vals) +end + +# LinearAlgebra API + +function LinearAlgebra.mul!(y::BlockPVector,A::BlockPMatrix,x::BlockPVector) + z = zero(eltype(y)) + o = one(eltype(A)) + for i in blockaxes(A,2) + fill!(y[i],z) + for j in blockaxes(A,2) + mul!(y[i],A[i,j],x[j],o,o) + end + end +end + +function LinearAlgebra.dot(x::BlockPVector,y::BlockPVector) + return sum(map(dot,blocks(x),blocks(y))) +end + +function LinearAlgebra.norm(v::BlockPVector,p::Real=2) + block_norms = map(vi->norm(vi,p),blocks(v)) + return sum(block_norms.^p)^(1/p) +end + +function LinearAlgebra.fillstored!(a::BlockPMatrix,v) + map(blocks(a)) do a + LinearAlgebra.fillstored!(a,v) + end + return a +end + +# Broadcasting + +struct BlockPBroadcasted{A,B} + blocks :: A + axes :: B +end + +BlockArrays.blocks(b::BlockPBroadcasted) = b.blocks +BlockArrays.blockaxes(b::BlockPBroadcasted) = b.axes + +function Base.broadcasted(f, args::Union{BlockPArray,BlockPBroadcasted}...) + a1 = first(args) + @boundscheck @assert all(ai -> blockaxes(ai) == blockaxes(a1),args) + + blocks_in = map(blocks,args) + blocks_out = map((largs...)->Base.broadcasted(f,largs...),blocks_in...) + + return BlockPBroadcasted(blocks_out,blockaxes(a1)) +end + +function Base.broadcasted(f, a::Number, b::Union{BlockPArray,BlockPBroadcasted}) + blocks_out = map(b->Base.broadcasted(f,a,b),blocks(b)) + return BlockPBroadcasted(blocks_out,blockaxes(b)) +end + +function Base.broadcasted(f, a::Union{BlockPArray,BlockPBroadcasted}, b::Number) + blocks_out = map(a->Base.broadcasted(f,a,b),blocks(a)) + return BlockPBroadcasted(blocks_out,blockaxes(a)) +end + +function Base.broadcasted(f, + a::Union{BlockPArray,BlockPBroadcasted}, + b::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}}) + Base.broadcasted(f,a,Base.materialize(b)) +end + +function Base.broadcasted( + f, + a::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}}, + b::Union{BlockPArray,BlockPBroadcasted}) + Base.broadcasted(f,Base.materialize(a),b) +end + +function Base.materialize(b::BlockPBroadcasted) + blocks_out = map(Base.materialize,blocks(b)) + return mortar(blocks_out) +end + +function Base.materialize!(a::BlockPArray,b::BlockPBroadcasted) + map(Base.materialize!,blocks(a),blocks(b)) + return a +end diff --git a/src/DivConformingFESpaces.jl b/src/DivConformingFESpaces.jl index bfd5f3dd..ac31e505 100644 --- a/src/DivConformingFESpaces.jl +++ b/src/DivConformingFESpaces.jl @@ -1,5 +1,4 @@ -""" -""" + function FESpaces.FESpace(model::DistributedDiscreteModel, reffe::Tuple{RaviartThomas,Any,Any}; conformity=nothing,kwargs...) diff --git a/src/FESpaces.jl b/src/FESpaces.jl index 3953bd90..21aedf42 100644 --- a/src/FESpaces.jl +++ b/src/FESpaces.jl @@ -119,11 +119,11 @@ end function fetch_vector_ghost_values_cache(vector_partition,partition) cache = PArrays.p_vector_cache(vector_partition,partition) map(reverse,cache) -end +end function fetch_vector_ghost_values!(vector_partition,cache) assemble!((a,b)->b, vector_partition, cache) -end +end function generate_gids( cell_range::PRange, @@ -173,7 +173,6 @@ function generate_gids( cell_to_ldofs, cell_range) - # Find the global range of owned dofs first_gdof = scan(+,nodofs,type=:exclusive,init=one(eltype(nodofs))) @@ -325,18 +324,18 @@ function FESpaces.EvaluationFunction( end function _EvaluationFunction(func, - f::DistributedSingleFieldFESpace,free_values::AbstractVector,isconsistent=false) - local_vals = consistent_local_views(free_values,f.gids,isconsistent) - fields = map(func,f.spaces,local_vals) + f::DistributedSingleFieldFESpace,x::AbstractVector,isconsistent=false) + free_values = change_ghost(x,f.gids,is_consistent=isconsistent,make_consistent=true) + fields = map(func,f.spaces,partition(free_values)) metadata = DistributedFEFunctionData(free_values) DistributedCellField(fields,metadata) end function _EvaluationFunction(func, - f::DistributedSingleFieldFESpace,free_values::AbstractVector, + f::DistributedSingleFieldFESpace,x::AbstractVector, dirichlet_values::AbstractArray{<:AbstractVector},isconsistent=false) - local_vals = consistent_local_views(free_values,f.gids,isconsistent) - fields = map(func,f.spaces,local_vals,dirichlet_values) + free_values = change_ghost(x,f.gids,is_consistent=isconsistent,make_consistent=true) + fields = map(func,f.spaces,partition(free_values),dirichlet_values) metadata = DistributedFEFunctionData(free_values) DistributedCellField(fields,metadata) end @@ -463,16 +462,16 @@ end # Factories -function FESpaces.FESpace(model::DistributedDiscreteModel,reffe;kwargs...) +function FESpaces.FESpace(model::DistributedDiscreteModel,reffe;split_own_and_ghost=false,kwargs...) spaces = map(local_views(model)) do m FESpace(m,reffe;kwargs...) end gids = generate_gids(model,spaces) - vector_type = _find_vector_type(spaces,gids) + vector_type = _find_vector_type(spaces,gids;split_own_and_ghost=split_own_and_ghost) DistributedSingleFieldFESpace(spaces,gids,vector_type) end -function FESpaces.FESpace(_trian::DistributedTriangulation,reffe;kwargs...) +function FESpaces.FESpace(_trian::DistributedTriangulation,reffe;split_own_and_ghost=false,kwargs...) trian = add_ghost_cells(_trian) trian_gids = generate_cell_gids(trian) spaces = map(trian.trians) do t @@ -481,23 +480,23 @@ function FESpaces.FESpace(_trian::DistributedTriangulation,reffe;kwargs...) cell_to_ldofs = map(get_cell_dof_ids,spaces) nldofs = map(num_free_dofs,spaces) gids = generate_gids(trian_gids,cell_to_ldofs,nldofs) - vector_type = _find_vector_type(spaces,gids) + vector_type = _find_vector_type(spaces,gids;split_own_and_ghost=split_own_and_ghost) DistributedSingleFieldFESpace(spaces,gids,vector_type) end -function _find_vector_type(spaces,gids) - # TODO Now the user can select the local vector type but not the global one - # new kw-arg global_vector_type ? - # we use PVector for the moment - local_vector_type=typeof(Int) - map(spaces) do space - local_vector_type=get_vector_type(space) +function _find_vector_type(spaces,gids;split_own_and_ghost=false) + local_vector_type = get_vector_type(PartitionedArrays.getany(spaces)) + Tv = eltype(local_vector_type) + T = Vector{Tv} + if split_own_and_ghost + T = OwnAndGhostVectors{T} end - vector_entries=map(i->local_vector_type(undef,length(local_to_owner(i))),partition(gids)) - - # Here we are determining the full type of a PVector by creating one auxiliary vector - # Can this be done more efficiently? - vector_type = typeof(PVector(vector_entries,partition(gids))) + if isa(gids,PRange) + vector_type = typeof(PVector{T}(undef,partition(gids))) + else # isa(gids,BlockPRange) + vector_type = typeof(BlockPVector{T}(undef,gids)) + end + return vector_type end # Assembly @@ -506,17 +505,12 @@ function FESpaces.collect_cell_matrix( trial::DistributedFESpace, test::DistributedFESpace, a::DistributedDomainContribution) - map( - collect_cell_matrix, - local_views(trial), - local_views(test), - local_views(a)) + map(collect_cell_matrix,local_views(trial),local_views(test),local_views(a)) end function FESpaces.collect_cell_vector( test::DistributedFESpace, a::DistributedDomainContribution) - map( - collect_cell_vector,local_views(test),local_views(a)) + map(collect_cell_vector,local_views(test),local_views(a)) end function FESpaces.collect_cell_matrix_and_vector( @@ -557,8 +551,7 @@ function FESpaces.collect_cell_matrix_and_vector( test::DistributedFESpace, mat::DistributedDomainContribution, l::Number) - map( - local_views(trial),local_views(test),local_views(mat)) do u,v,m + map(local_views(trial),local_views(test),local_views(mat)) do u,v,m collect_cell_matrix_and_vector(u,v,m,l) end end @@ -569,8 +562,7 @@ function FESpaces.collect_cell_matrix_and_vector( mat::DistributedDomainContribution, l::Number, uhd) - map( - local_views(trial),local_views(test),local_views(mat),local_views(uhd)) do u,v,m,f + map(local_views(trial),local_views(test),local_views(mat),local_views(uhd)) do u,v,m,f collect_cell_matrix_and_vector(u,v,m,l,f) end end @@ -594,27 +586,37 @@ FESpaces.get_vector_builder(a::DistributedSparseMatrixAssembler) = a.vector_buil FESpaces.get_assembly_strategy(a::DistributedSparseMatrixAssembler) = a.strategy function FESpaces.symbolic_loop_matrix!(A,a::DistributedSparseMatrixAssembler,matdata) - map(symbolic_loop_matrix!,local_views(A,a.test_dofs_gids_prange,a.trial_dofs_gids_prange),a.assems,matdata) + rows = get_rows(a) + cols = get_cols(a) + map(symbolic_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata) end function FESpaces.numeric_loop_matrix!(A,a::DistributedSparseMatrixAssembler,matdata) - map(numeric_loop_matrix!,local_views(A,a.test_dofs_gids_prange,a.trial_dofs_gids_prange),a.assems,matdata) + rows = get_rows(a) + cols = get_cols(a) + map(numeric_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata) end function FESpaces.symbolic_loop_vector!(b,a::DistributedSparseMatrixAssembler,vecdata) - map(symbolic_loop_vector!,local_views(b,a.test_dofs_gids_prange),a.assems,vecdata) + rows = get_rows(a) + map(symbolic_loop_vector!,local_views(b,rows),local_views(a),vecdata) end function FESpaces.numeric_loop_vector!(b,a::DistributedSparseMatrixAssembler,vecdata) - map(numeric_loop_vector!,local_views(b,a.test_dofs_gids_prange),a.assems,vecdata) + rows = get_rows(a) + map(numeric_loop_vector!,local_views(b,rows),local_views(a),vecdata) end function FESpaces.symbolic_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data) - map(symbolic_loop_matrix_and_vector!,local_views(A,a.test_dofs_gids_prange,a.trial_dofs_gids_prange),local_views(b,a.test_dofs_gids_prange),a.assems,data) + rows = get_rows(a) + cols = get_cols(a) + map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data) end function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data) - map(numeric_loop_matrix_and_vector!,local_views(A,a.test_dofs_gids_prange,a.trial_dofs_gids_prange),local_views(b,a.test_dofs_gids_prange),a.assems,data) + rows = get_rows(a) + cols = get_cols(a) + map(numeric_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data) end # Parallel Assembly strategies @@ -625,16 +627,37 @@ end # When using this one, make sure that you also loop over ghost cells. # This is at your own risk. -function local_assembly_strategy(::FullyAssembledRows,test_space_indices,trial_space_indices) - test_space_local_to_ghost = local_to_ghost(test_space_indices) +function local_assembly_strategy(::FullyAssembledRows,rows,cols) + rows_local_to_ghost = local_to_ghost(rows) GenericAssemblyStrategy( identity, identity, - row->test_space_local_to_ghost[row]==0, + row->rows_local_to_ghost[row]==0, col->true) end # Assembler high level constructors +function FESpaces.SparseMatrixAssembler( + local_mat_type, + local_vec_type, + rows::PRange, + cols::PRange, + par_strategy=SubAssembledRows()) + + assems = map(partition(rows),partition(cols)) do rows,cols + local_strategy = local_assembly_strategy(par_strategy,rows,cols) + FESpaces.GenericSparseMatrixAssembler(SparseMatrixBuilder(local_mat_type), + ArrayBuilder(local_vec_type), + Base.OneTo(length(rows)), + Base.OneTo(length(cols)), + local_strategy) + end + + mat_builder = PSparseMatrixBuilderCOO(local_mat_type,par_strategy) + vec_builder = PVectorBuilder(local_vec_type,par_strategy) + return DistributedSparseMatrixAssembler(par_strategy,assems,mat_builder,vec_builder,rows,cols) +end + function FESpaces.SparseMatrixAssembler( local_mat_type, local_vec_type, @@ -642,25 +665,9 @@ function FESpaces.SparseMatrixAssembler( test::DistributedFESpace, par_strategy=SubAssembledRows()) - Tv = local_vec_type - T = eltype(Tv) - Tm = local_mat_type - trial_dofs_gids_partition = partition(trial.gids) - test_dofs_gids_partition = partition(test.gids) - assems = map(local_views(test),local_views(trial),test_dofs_gids_partition,trial_dofs_gids_partition) do v,u,trial_gids_partition,test_gids_partition - local_strategy = local_assembly_strategy(par_strategy,trial_gids_partition,test_gids_partition) - SparseMatrixAssembler(Tm,Tv,u,v,local_strategy) - end - matrix_builder = PSparseMatrixBuilderCOO(Tm,par_strategy) - vector_builder = PVectorBuilder(Tv,par_strategy) - test_dofs_gids_prange = get_free_dof_ids(test) - trial_dofs_gids_prange = get_free_dof_ids(trial) - DistributedSparseMatrixAssembler(par_strategy, - assems, - matrix_builder, - vector_builder, - test_dofs_gids_prange, - trial_dofs_gids_prange) + rows = get_free_dof_ids(test) + cols = get_free_dof_ids(trial) + SparseMatrixAssembler(local_mat_type,local_vec_type,rows,cols,par_strategy) end function FESpaces.SparseMatrixAssembler( @@ -668,11 +675,8 @@ function FESpaces.SparseMatrixAssembler( test::DistributedFESpace, par_strategy=SubAssembledRows()) - Tv = typeof(Int) - map(local_views(trial)) do trial - Tv = get_vector_type(trial) - end - T = eltype(Tv) + Tv = PartitionedArrays.getany(map(get_vector_type,local_views(trial))) + T = eltype(Tv) Tm = SparseMatrixCSC{T,Int} SparseMatrixAssembler(Tm,Tv,trial,test,par_strategy) end diff --git a/src/GridapDistributed.jl b/src/GridapDistributed.jl index eca02ab6..9e985bc8 100644 --- a/src/GridapDistributed.jl +++ b/src/GridapDistributed.jl @@ -22,9 +22,11 @@ const PArrays = PartitionedArrays using SparseArrays using WriteVTK using FillArrays +using BlockArrays +using LinearAlgebra import Gridap.TensorValues: inner, outer, double_contraction, symmetric_part -import LinearAlgebra: det, tr, cross, dot, ⋅ +import LinearAlgebra: det, tr, cross, dot, ⋅, diag import Base: inv, abs, abs2, *, +, -, /, adjoint, transpose, real, imag, conj, getproperty, propertynames import Gridap.Fields: grad2curl import Gridap.ODEs.ODETools: ∂t, ∂tt @@ -38,6 +40,8 @@ export get_face_gids export local_views, get_parts export with_ghost, no_ghost +include("BlockPartitionedArrays.jl") + include("Algebra.jl") include("Geometry.jl") diff --git a/src/MultiField.jl b/src/MultiField.jl index 68ccb499..fa1f16e9 100644 --- a/src/MultiField.jl +++ b/src/MultiField.jl @@ -29,20 +29,21 @@ local_views(a::Vector{<:DistributedCellField}) = [ai.fields for ai in a] """ """ -struct DistributedMultiFieldFESpace{A,B,C,D} <: DistributedFESpace +struct DistributedMultiFieldFESpace{MS,A,B,C,D} <: DistributedFESpace + multi_field_style::MS field_fe_space::A part_fe_space::B gids::C vector_type::Type{D} function DistributedMultiFieldFESpace( field_fe_space::AbstractVector{<:DistributedSingleFieldFESpace}, - part_fe_space::AbstractArray{<:MultiFieldFESpace}, - gids::PRange, - vector_type::Type{D}) where D + part_fe_space::AbstractArray{<:MultiFieldFESpace{MS}}, + gids::Union{<:PRange,<:BlockPRange}, + vector_type::Type{D}) where {D,MS} A = typeof(field_fe_space) B = typeof(part_fe_space) C = typeof(gids) - new{A,B,C,D}(field_fe_space,part_fe_space,gids,vector_type) + new{MS,A,B,C,D}(MS(),field_fe_space,part_fe_space,gids,vector_type) end end @@ -62,7 +63,7 @@ function FESpaces.get_free_dof_ids(fs::DistributedMultiFieldFESpace) end function MultiField.restrict_to_field( - f::DistributedMultiFieldFESpace,free_values::PVector,field::Integer) + f::DistributedMultiFieldFESpace,free_values::AbstractVector,field::Integer) values = map(f.part_fe_space,partition(free_values)) do u,x restrict_to_field(u,x,field) end @@ -72,10 +73,8 @@ end function FESpaces.FEFunction( f::DistributedMultiFieldFESpace,x::AbstractVector,isconsistent=false) - free_values = change_ghost(x,f.gids) - # This will cause also the single-field components to be consistent - local_vals = consistent_local_views(free_values,f.gids,isconsistent) - part_fe_fun = map(FEFunction,f.part_fe_space,local_vals) + free_values = change_ghost(x,f.gids;is_consistent=isconsistent,make_consistent=true) + part_fe_fun = map(FEFunction,f.part_fe_space,partition(free_values)) field_fe_fun = DistributedSingleFieldFEFunction[] for i in 1:num_fields(f) free_values_i = restrict_to_field(f,free_values,i) @@ -88,10 +87,8 @@ end function FESpaces.EvaluationFunction( f::DistributedMultiFieldFESpace,x::AbstractVector,isconsistent=false) - free_values = change_ghost(x,f.gids) - # This will cause also the single-field components to be consistent - local_vals = consistent_local_views(free_values,f.gids,false) - part_fe_fun = map(EvaluationFunction,f.part_fe_space,local_vals) + free_values = change_ghost(x,f.gids;is_consistent=isconsistent,make_consistent=true) + part_fe_fun = map(EvaluationFunction,f.part_fe_space,partition(free_values)) field_fe_fun = DistributedSingleFieldFEFunction[] for i in 1:num_fields(f) free_values_i = restrict_to_field(f,free_values,i) @@ -108,8 +105,7 @@ function FESpaces.interpolate(objects,fe::DistributedMultiFieldFESpace) end function FESpaces.interpolate!(objects,free_values::AbstractVector,fe::DistributedMultiFieldFESpace) - local_vals = consistent_local_views(free_values,fe.gids,true) - part_fe_fun = map(local_vals,local_views(fe)) do x,f + part_fe_fun = map(partition(free_values),local_views(fe)) do x,f interpolate!(objects,x,f) end field_fe_fun = DistributedSingleFieldFEFunction[] @@ -124,8 +120,7 @@ end function FESpaces.interpolate_everywhere(objects,fe::DistributedMultiFieldFESpace) free_values = zero_free_values(fe) - local_vals = consistent_local_views(free_values,fe.gids,true) - part_fe_fun = map(local_vals,local_views(fe)) do x,f + part_fe_fun = map(partition(free_values),local_views(fe)) do x,f interpolate!(objects,x,f) end field_fe_fun = DistributedSingleFieldFEFunction[] @@ -143,8 +138,10 @@ function FESpaces.interpolate_everywhere!( objects,free_values::AbstractVector, dirichlet_values::Vector{AbstractArray{<:AbstractVector}}, fe::DistributedMultiFieldFESpace) - local_vals = consistent_local_views(free_values,fe.gids,true) - part_fe_fun = map(local_vals,local_views(fe)) do x,f + msg = "free_values and fe have incompatible index partitions." + @check partition(axes(free_values,1)) === partition(fe.gids) msg + + part_fe_fun = map(partition(free_values),local_views(fe)) do x,f interpolate!(objects,x,f) end field_fe_fun = DistributedSingleFieldFEFunction[] @@ -160,7 +157,7 @@ end function FESpaces.interpolate_everywhere( objects::Vector{<:DistributedCellField},fe::DistributedMultiFieldFESpace) - local_objects = local_views(objects) + local_objects = map(local_views,objects) local_spaces = local_views(fe) part_fe_fun = map(local_spaces,local_objects...) do f,o... interpolate_everywhere(o,f) @@ -224,13 +221,13 @@ function FESpaces.TrialFESpace(objects,a::DistributedMultiFieldFESpace) TrialFESpace(a,objects) end -function FESpaces.TrialFESpace(a::DistributedMultiFieldFESpace,objects) +function FESpaces.TrialFESpace(a::DistributedMultiFieldFESpace{MS},objects) where MS f_dspace_test = a.field_fe_space f_dspace = map( arg -> TrialFESpace(arg[1],arg[2]), zip(f_dspace_test,objects) ) f_p_space = map(local_views,f_dspace) v(x...) = collect(x) p_f_space = map(v,f_p_space...) - p_mspace = map(MultiFieldFESpace,p_f_space) + p_mspace = map(s->MultiFieldFESpace(s;style=MS()),p_f_space) gids = a.gids vector_type = a.vector_type DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,vector_type) @@ -239,17 +236,20 @@ end # Factory function MultiField.MultiFieldFESpace( - f_dspace::Vector{<:DistributedSingleFieldFESpace}) + f_dspace::Vector{<:DistributedSingleFieldFESpace};split_own_and_ghost=false, kwargs...) f_p_space = map(local_views,f_dspace) v(x...) = collect(x) - p_f_space = map(v,f_p_space...) - p_mspace = map(MultiFieldFESpace,p_f_space) - gids = generate_multi_field_gids(f_dspace,p_mspace) - vector_type = _find_vector_type(p_mspace,gids) + + p_f_space = map(v,f_p_space...) + p_mspace = map(f->MultiFieldFESpace(f;kwargs...),p_f_space) + style = PartitionedArrays.getany(map(MultiFieldStyle,p_mspace)) + gids = generate_multi_field_gids(style,f_dspace,p_mspace) + vector_type = _find_vector_type(p_mspace,gids;split_own_and_ghost=split_own_and_ghost) DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,vector_type) end function generate_multi_field_gids( + ::MultiFieldStyle, f_dspace::Vector{<:DistributedSingleFieldFESpace}, p_mspace::AbstractArray{<:MultiFieldFESpace}) @@ -268,6 +268,19 @@ function generate_multi_field_gids( gids = generate_multi_field_gids(f_p_flid_lid,f_frange) end +function generate_multi_field_gids( + ::BlockMultiFieldStyle{NB,SB,P}, + f_dspace::Vector{<:DistributedSingleFieldFESpace}, + p_mspace::AbstractArray{<:MultiFieldFESpace}) where {NB,SB,P} + + block_ranges = MultiField.get_block_ranges(NB,SB,P) + block_gids = map(block_ranges) do range + space = (length(range) == 1) ? f_dspace[range[1]] : MultiFieldFESpace(f_dspace[range]) + get_free_dof_ids(space) + end + return BlockPRange(block_gids) +end + function generate_multi_field_gids( f_p_flid_lid::AbstractVector{<:AbstractArray{<:AbstractVector}}, f_frange::AbstractVector{<:PRange}) @@ -381,3 +394,92 @@ function propagate_to_ghost_multifield!( end end end + +# BlockSparseMatrixAssemblers + +const DistributedBlockSparseMatrixAssembler{NB,NV,SB,P} = + MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P,<:DistributedSparseMatrixAssembler} + +function FESpaces.SparseMatrixAssembler( + local_mat_type, + local_vec_type, + trial::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}}, + test::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}}, + par_strategy=SubAssembledRows()) where {NB,SB,P} + + block_idx = CartesianIndices((NB,NB)) + block_rows = blocks(test.gids) + block_cols = blocks(trial.gids) + block_assemblers = map(block_idx) do idx + rows = block_rows[idx[1]]; cols = block_cols[idx[2]] + return SparseMatrixAssembler(local_mat_type,local_vec_type,rows,cols,par_strategy) + end + + NV = length(P) + return MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}(block_assemblers) +end + +function local_views(a::MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}) where {NB,NV,SB,P} + assems = a.block_assemblers + array = to_parray_of_arrays(map(local_views,assems)) + return map(MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P},array) +end + +function local_views(a::MatrixBlock,rows,cols) + idx = CartesianIndices(axes(a)) + array = map(idx) do I + local_views(a[I],rows[I[1]],cols[I[2]]) + end + return map(b -> ArrayBlock(b,a.touched), to_parray_of_arrays(array)) +end + +function local_views(a::VectorBlock,rows) + idx = CartesianIndices(axes(a)) + array = map(idx) do I + local_views(a[I],rows[I]) + end + return map(b -> ArrayBlock(b,a.touched), to_parray_of_arrays(array)) +end + +function local_views(a::ArrayBlockView,axes...) + array = local_views(a.array,axes...) + map(array) do array + ArrayBlockView(array,a.block_map) + end +end + +# SparseMatrixAssembler API + +function FESpaces.symbolic_loop_matrix!(A,a::DistributedBlockSparseMatrixAssembler,matdata) + rows = get_rows(a) + cols = get_cols(a) + map(symbolic_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata) +end + +function FESpaces.numeric_loop_matrix!(A,a::DistributedBlockSparseMatrixAssembler,matdata) + rows = get_rows(a) + cols = get_cols(a) + map(numeric_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata) +end + +function FESpaces.symbolic_loop_vector!(b,a::DistributedBlockSparseMatrixAssembler,vecdata) + rows = get_rows(a) + map(symbolic_loop_vector!,local_views(b,rows),local_views(a),vecdata) +end + +function FESpaces.numeric_loop_vector!(b,a::DistributedBlockSparseMatrixAssembler,vecdata) + rows = get_rows(a) + map(numeric_loop_vector!,local_views(b,rows),local_views(a),vecdata) +end + +function FESpaces.symbolic_loop_matrix_and_vector!(A,b,a::DistributedBlockSparseMatrixAssembler,data) + rows = get_rows(a) + cols = get_cols(a) + map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data) +end + +function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::DistributedBlockSparseMatrixAssembler,data) + rows = get_rows(a) + cols = get_cols(a) + map(numeric_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data) +end diff --git a/test/BlockPartitionedArraysTests.jl b/test/BlockPartitionedArraysTests.jl new file mode 100644 index 00000000..3e810085 --- /dev/null +++ b/test/BlockPartitionedArraysTests.jl @@ -0,0 +1,112 @@ + +using Test +using Gridap +using PartitionedArrays +using GridapDistributed +using BlockArrays +using SparseArrays +using LinearAlgebra + +using GridapDistributed: BlockPArray, BlockPVector, BlockPMatrix, BlockPRange + +ranks = with_debug() do distribute + distribute(LinearIndices((2,))) +end + +indices = map(ranks) do r + if r == 1 + own_gids = [1,2,3,4,5] + ghost_gids = [6,7] + ghost_owners = [2,2] + else + own_gids = [6,7,8,9,10] + ghost_gids = [5] + ghost_owners = [1] + end + own_idx = OwnIndices(10,r,own_gids) + ghost_idx = GhostIndices(10,ghost_gids,ghost_owners) + OwnAndGhostIndices(own_idx,ghost_idx) +end + +block_range = BlockPRange([PRange(indices),PRange(indices)]) + +_v = PVector{OwnAndGhostVectors{Vector{Float64}}}(undef,indices) +v = BlockPArray([_v,_v],(block_range,)) +fill!(v,1.0) + +_m = map(CartesianIndices((2,2))) do I + i,j = I[1],I[2] + local_mats = map(ranks,indices) do r, ind + n = local_length(ind) + if i==j && r == 1 + SparseMatrixCSC(n,n,Int[1,3,5,7,9,10,11,13],Int[1,2,2,3,3,4,4,5,5,6,6,7],fill(1.0,12)) + elseif i==j && r == 2 + SparseMatrixCSC(n,n,Int[1,2,4,6,8,10,11],Int[1,1,2,2,3,3,4,4,5,6],fill(1.0,10)) + else + SparseMatrixCSC(n,n,fill(Int(1),n+1),Int[],Float64.([])) + end + end + PSparseMatrix(local_mats,indices,indices) +end; +m = BlockPArray(_m,(block_range,block_range)) + +x = similar(_v) +mul!(x,_m[1,1],_v) + +# BlockPRange + +@test blocklength(block_range) == 2 +@test blocksize(block_range) == (2,) + +# AbstractArray API + +__v = similar(v,block_range) +__m = similar(m,(block_range,block_range)) +fill!(__v,1.0) + +__v = __v .+ 1.0 +__v = __v .- 1.0 +__v = __v .* 1.0 +__v = __v ./ 1.0 + +# PartitionedArrays API + +consistent!(__v) |> wait +t = assemble!(__v) +assemble!(__m) |> wait +fetch(t); + +PartitionedArrays.to_trivial_partition(m) + +partition(v) +partition(m) +local_values(v) +own_values(v) +ghost_values(v) +own_ghost_values(m) +ghost_own_values(m) + +# LinearAlgebra API +fill!(v,1.0) +x = similar(v) +mul!(x,m,v) +consistent!(x) |> wait + +@test dot(v,x) ≈ 36 +norm(v) +copy!(x,v) + +LinearAlgebra.fillstored!(__m,1.0) + +__v = BlockPVector{Vector{Float64}}(undef,block_range) +#__m = BlockPMatrix{SparseMatrixCSC{Float64,Int64}}(undef,block_range,block_range) + +maximum(abs,v) +minimum(abs,v) + +# GridapDistributed API +v_parts = local_views(v) +m_parts = local_views(m) + +v_parts = local_views(v,block_range) +m_parts = local_views(m,block_range,block_range) diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl new file mode 100644 index 00000000..9b92f185 --- /dev/null +++ b/test/BlockSparseMatrixAssemblersTests.jl @@ -0,0 +1,117 @@ +module BlockSparseMatrixAssemblersTests + +using Test, LinearAlgebra, BlockArrays, SparseArrays + +using Gridap +using Gridap.FESpaces, Gridap.ReferenceFEs, Gridap.MultiField + +using GridapDistributed +using PartitionedArrays +using GridapDistributed: BlockPVector, BlockPMatrix + +function is_same_vector(x::BlockPVector,y::PVector,Ub,U) + y_fespace = GridapDistributed.change_ghost(y,U.gids) + x_fespace = GridapDistributed.change_ghost(x,Ub.gids) + + res = map(1:num_fields(Ub)) do i + xi = restrict_to_field(Ub,x_fespace,i) + yi = restrict_to_field(U,y_fespace,i) + xi ≈ yi + end + return all(res) +end + +function is_same_matrix(Ab::BlockPMatrix,A::PSparseMatrix,Xb,X) + yb = mortar(map(Aii->pfill(0.0,partition(axes(Aii,1))),diag(blocks(Ab)))); + xb = mortar(map(Aii->pfill(1.0,partition(axes(Aii,2))),diag(blocks(Ab)))); + mul!(yb,Ab,xb) + + y = pfill(0.0,partition(axes(A,1))) + x = pfill(1.0,partition(axes(A,2))) + mul!(y,A,x) + + return is_same_vector(yb,y,Xb,X) +end + +function _main(n_spaces,mfs,weakform,Ω,dΩ,U,V) + biform, liform = weakform + + # Normal assembly + Y = MultiFieldFESpace(fill(V,n_spaces)) + X = MultiFieldFESpace(fill(U,n_spaces)) + + u = get_trial_fe_basis(X) + v = get_fe_basis(Y) + + data = collect_cell_matrix_and_vector(X,Y,biform(u,v),liform(v)) + matdata = collect_cell_matrix(X,Y,biform(u,v)) + vecdata = collect_cell_vector(Y,liform(v)) + + assem = SparseMatrixAssembler(X,Y,FullyAssembledRows()) + A1 = assemble_matrix(assem,matdata) + b1 = assemble_vector(assem,vecdata) + A2,b2 = assemble_matrix_and_vector(assem,data); + + # Block Assembly + Yb = MultiFieldFESpace(fill(V,n_spaces);style=mfs) + Xb = MultiFieldFESpace(fill(U,n_spaces);style=mfs) + + ub = get_trial_fe_basis(Xb) + vb = get_fe_basis(Yb) + + bdata = collect_cell_matrix_and_vector(Xb,Yb,biform(ub,vb),liform(vb)) + bmatdata = collect_cell_matrix(Xb,Yb,biform(ub,vb)) + bvecdata = collect_cell_vector(Yb,liform(vb)) + + assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows()) + A1_blocks = assemble_matrix(assem_blocks,bmatdata); + b1_blocks = assemble_vector(assem_blocks,bvecdata); + @test is_same_vector(b1_blocks,b1,Yb,Y) + @test is_same_matrix(A1_blocks,A1,Xb,X) + + assemble_matrix!(A1_blocks,assem_blocks,bmatdata); + assemble_vector!(b1_blocks,assem_blocks,bvecdata); + @test is_same_vector(b1_blocks,b1,Yb,Y) + @test is_same_matrix(A1_blocks,A1,Xb,X) + + A2_blocks, b2_blocks = assemble_matrix_and_vector(assem_blocks,bdata) + @test is_same_vector(b2_blocks,b2,Yb,Y) + @test is_same_matrix(A2_blocks,A2,Xb,X) + + assemble_matrix_and_vector!(A2_blocks,b2_blocks,assem_blocks,bdata) + @test is_same_vector(b2_blocks,b2,Yb,Y) + @test is_same_matrix(A2_blocks,A2,Xb,X) + + op = AffineFEOperator(biform,liform,X,Y) + block_op = AffineFEOperator(biform,liform,Xb,Yb) + @test is_same_vector(get_vector(block_op),get_vector(op),Yb,Y) + @test is_same_matrix(get_matrix(block_op),get_matrix(op),Xb,X) +end + +############################################################################################ + +function main(distribute,parts) + ranks = distribute(LinearIndices((prod(parts),))) + + model = CartesianDiscreteModel(ranks,parts,(0,1,0,1),(8,8)) + Ω = Triangulation(model) + + sol(x) = sum(x) + reffe = LagrangianRefFE(Float64,QUAD,1) + V = FESpace(Ω, reffe; dirichlet_tags="boundary") + U = TrialFESpace(sol,V) + + dΩ = Measure(Ω, 2) + biform2((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2)*dΩ + liform2((v1,v2)) = ∫(v1 + v2)*dΩ + biform3((u1,u2,u3),(v1,v2,v3)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 + u3⋅v2 + u2⋅v3)*dΩ + liform3((v1,v2,v3)) = ∫(v1 + v2 + v3)*dΩ + + for (n_spaces,weakform) in zip([2,3],[(biform2,liform2),(biform3,liform3)]) + for mfs in [BlockMultiFieldStyle(),BlockMultiFieldStyle(2,(1,n_spaces-1))] + _main(n_spaces,mfs,weakform,Ω,dΩ,U,V) + end + end +end + +end \ No newline at end of file diff --git a/test/FESpacesTests.jl b/test/FESpacesTests.jl index 7b0860ab..bf1e78e6 100644 --- a/test/FESpacesTests.jl +++ b/test/FESpacesTests.jl @@ -91,11 +91,11 @@ function main(distribute,parts,das) V = TestFESpace(model,reffe,dirichlet_tags="boundary") U = TrialFESpace(u,V) V2 = FESpace(Ω,reffe) - @test get_vector_type(V) <: PVector - @test get_vector_type(U) <: PVector - @test get_vector_type(V2) <: PVector + @test get_vector_type(V) <: PVector{<:Vector} + @test get_vector_type(U) <: PVector{<:Vector} + @test get_vector_type(V2) <: PVector{<:Vector} - free_values_partition=map(partition(V.gids)) do indices + free_values_partition = map(partition(V.gids)) do indices ones(Float64,local_length(indices)) end @@ -163,6 +163,23 @@ function main(distribute,parts,das) cont = ∫( abs2(u0h) )dΩ @test sqrt(sum(cont)) < 1.0e-14 + # OwnAndGhostVector partitions + V3 = FESpace(model,reffe,dirichlet_tags="boundary",split_own_and_ghost=true) + U3 = TrialFESpace(u,V3) + @test get_vector_type(V3) <: PVector{<:OwnAndGhostVectors} + + free_values = zero_free_values(U3) + dirichlet_values = get_dirichlet_dof_values(U3) + uh = interpolate_everywhere(u,U3) + _uh = interpolate_everywhere(uh,U3) + __uh = interpolate_everywhere!(_uh,free_values,dirichlet_values,U3) + + uh = interpolate(u,U3) + dofs = get_fe_dof_basis(U3) + cell_vals = dofs(uh) + gather_free_values!(free_values,U3,cell_vals) + gather_free_and_dirichlet_values!(free_values,dirichlet_values,U3,cell_vals) + uh = FEFunction(U3,free_values,dirichlet_values) # I need to use the square [0,2]² in the sequel so that # when integrating over the interior facets, the entries diff --git a/test/MultiFieldTests.jl b/test/MultiFieldTests.jl index b5a4f534..cb53e322 100644 --- a/test/MultiFieldTests.jl +++ b/test/MultiFieldTests.jl @@ -2,11 +2,17 @@ module MultiFieldTests using Gridap using Gridap.FESpaces +using Gridap.MultiField using GridapDistributed using PartitionedArrays using Test -function main(distribute, parts) +function l2_error(u1,u2,dΩ) + eu = u1 - u2 + sqrt(sum(∫( eu⋅eu )dΩ)) +end + +function main(distribute, parts, mfs) ranks = distribute(LinearIndices((prod(parts),))) output = mkpath(joinpath(@__DIR__,"output")) @@ -16,6 +22,7 @@ function main(distribute, parts) Ω = Triangulation(model) k = 2 + dΩ = Measure(Ω,2*k) reffe_u = ReferenceFE(lagrangian,VectorValue{2,Float64},k) reffe_p = ReferenceFE(lagrangian,Float64,k-1,space=:P) @@ -29,34 +36,43 @@ function main(distribute, parts) U = TrialFESpace(V,u) P = TrialFESpace(Q,p) - VxQ = MultiFieldFESpace([V,Q]) - UxP = MultiFieldFESpace([U,P]) # This generates again the global numbering + VxQ = MultiFieldFESpace([V,Q];style=mfs) + UxP = MultiFieldFESpace([U,P];style=mfs) # This generates again the global numbering UxP = TrialFESpace([u,p],VxQ) # This reuses the one computed @test length(UxP) == 2 uh, ph = interpolate([u,p],UxP) - eu = u - uh - ep = p - ph - - dΩ = Measure(Ω,2*k) - @test sqrt(sum(∫( eu⋅eu )dΩ)) < 1.0e-9 - @test sqrt(sum(∫( eu⋅eu )dΩ)) < 1.0e-9 + @test l2_error(u,uh,dΩ) < 1.0e-9 + @test l2_error(p,ph,dΩ) < 1.0e-9 a((u,p),(v,q)) = ∫( ∇(v)⊙∇(u) - q*(∇⋅u) - (∇⋅v)*p )*dΩ l((v,q)) = ∫( v⋅f - q*g )*dΩ op = AffineFEOperator(a,l,UxP,VxQ) - solver = LinearFESolver(BackslashSolver()) - uh, ph = solve(solver,op) - - eu = u - uh - ep = p - ph - - writevtk(Ω,"Ω",nsubcells=10,cellfields=["uh"=>uh,"ph"=>ph]) - - @test sqrt(sum(∫( eu⋅eu )dΩ)) < 1.0e-9 - @test sqrt(sum(∫( eu⋅eu )dΩ)) < 1.0e-9 + if !isa(mfs,BlockMultiFieldStyle) # BlockMultiFieldStyle does not support BackslashSolver + solver = LinearFESolver(BackslashSolver()) + uh, ph = solve(solver,op) + @test l2_error(u,uh,dΩ) < 1.0e-9 + @test l2_error(p,ph,dΩ) < 1.0e-9 + + writevtk(Ω,"Ω",nsubcells=10,cellfields=["uh"=>uh,"ph"=>ph]) + end + + A = get_matrix(op) + xh = interpolate([u,p],UxP) + x = GridapDistributed.change_ghost(get_free_dof_values(xh),axes(A,2)) + uh1, ph1 = FESpaces.EvaluationFunction(UxP,x) + uh2, ph2 = FEFunction(UxP,x) + + @test l2_error(u,uh1,dΩ) < 1.0e-9 + @test l2_error(p,ph1,dΩ) < 1.0e-9 + @test l2_error(u,uh2,dΩ) < 1.0e-9 + @test l2_error(p,ph2,dΩ) < 1.0e-9 +end +function main(distribute, parts) + main(distribute, parts, ConsecutiveMultiFieldStyle()) + main(distribute, parts, BlockMultiFieldStyle()) end end # module diff --git a/test/StokesHdivDGTests.jl b/test/StokesHdivDGTests.jl index f3f19c78..f9d77494 100644 --- a/test/StokesHdivDGTests.jl +++ b/test/StokesHdivDGTests.jl @@ -34,84 +34,84 @@ function stokes_solution_2D(μ::Real) end function main(distribute,parts) - ranks = distribute(LinearIndices((prod(parts),))) - - μ = 1.0 - sol = stokes_solution_2D(μ) - u_ref = sol.u - f_ref = sol.f - σ_ref = sol.σ - - D = 2 - n = 4 - domain = Tuple(repeat([0,1],D)) - partition = (n,n) - model = CartesianDiscreteModel(ranks,parts,domain,partition) - - labels = get_face_labeling(model) - add_tag_from_tags!(labels,"dirichlet",[5,6,7]) - add_tag_from_tags!(labels,"neumann",[8,]) - - ############################################################################################ - order = 1 - reffeᵤ = ReferenceFE(raviart_thomas,Float64,order) - V = TestFESpace(model,reffeᵤ,conformity=:HDiv,dirichlet_tags="dirichlet") - U = TrialFESpace(V,u_ref) - - reffeₚ = ReferenceFE(lagrangian,Float64,order;space=:P) - Q = TestFESpace(model,reffeₚ,conformity=:L2) - P = TrialFESpace(Q) - - Y = MultiFieldFESpace([V, Q]) - X = MultiFieldFESpace([U, P]) - - qdegree = 2*order+1 - Ω = Triangulation(model) - dΩ = Measure(Ω,qdegree) - - Γ = BoundaryTriangulation(model) - dΓ = Measure(Γ,qdegree) - n_Γ = get_normal_vector(Γ) - - Γ_D = BoundaryTriangulation(model;tags=["dirichlet"]) - dΓ_D = Measure(Γ_D,qdegree) - n_Γ_D = get_normal_vector(Γ_D) - - Γ_N = BoundaryTriangulation(model;tags="neumann") - dΓ_N = Measure(Γ_N,qdegree) - n_Γ_N = get_normal_vector(Γ_N) - - Λ = SkeletonTriangulation(model) - dΛ = Measure(Λ,qdegree) - n_Λ = get_normal_vector(Λ) - - h_e = CellField(map(get_array,local_views(∫(1)dΩ)),Ω) - h_e_Λ = CellField(map(get_array,local_views(∫(1)dΛ)),Λ) - h_e_Γ_D = CellField(map(get_array,local_views(∫(1)dΓ_D)),Γ_D) - - β_U = 50.0 - Δ_dg(u,v) = ∫(∇(v)⊙∇(u))dΩ - - ∫(jump(v⊗n_Λ)⊙(mean(∇(u))))dΛ - - ∫(mean(∇(v))⊙jump(u⊗n_Λ))dΛ - - ∫(v⋅(∇(u)⋅n_Γ_D))dΓ_D - - ∫((∇(v)⋅n_Γ_D)⋅u)dΓ_D - rhs((v,q)) = ∫((f_ref⋅v))*dΩ - ∫((∇(v)⋅n_Γ_D)⋅u_ref)dΓ_D + ∫((n_Γ_N⋅σ_ref)⋅v)*dΓ_N - - penalty(u,v) = ∫(jump(v⊗n_Λ)⊙((β_U/h_e_Λ*jump(u⊗n_Λ))))dΛ + ∫(v⋅(β_U/h_e_Γ_D*u))dΓ_D - penalty_rhs((v,q)) = ∫(v⋅(β_U/h_e_Γ_D*u_ref))dΓ_D - - a((u,p),(v,q)) = Δ_dg(u,v) + ∫(-(∇⋅v)*p - q*(∇⋅u))dΩ + penalty(u,v) - l((v,q)) = rhs((v,q)) - ∫(q*(∇⋅u_ref))dΩ + penalty_rhs((v,q)) - - op = AffineFEOperator(a,l,X,Y) - xh = solve(op) - - uh, ph = xh - err_u = l2_error(Ω,uh,sol.u) - err_p = l2_error(Ω,ph,sol.p) - tol = 1.0e-12 - @test err_u < tol - @test err_p < tol + ranks = distribute(LinearIndices((prod(parts),))) + + μ = 1.0 + sol = stokes_solution_2D(μ) + u_ref = sol.u + f_ref = sol.f + σ_ref = sol.σ + + D = 2 + n = 4 + domain = Tuple(repeat([0,1],D)) + partition = (n,n) + model = CartesianDiscreteModel(ranks,parts,domain,partition) + + labels = get_face_labeling(model) + add_tag_from_tags!(labels,"dirichlet",[5,6,7]) + add_tag_from_tags!(labels,"neumann",[8,]) + + ############################################################################################ + order = 1 + reffeᵤ = ReferenceFE(raviart_thomas,Float64,order) + V = TestFESpace(model,reffeᵤ,conformity=:HDiv,dirichlet_tags="dirichlet") + U = TrialFESpace(V,u_ref) + + reffeₚ = ReferenceFE(lagrangian,Float64,order;space=:P) + Q = TestFESpace(model,reffeₚ,conformity=:L2) + P = TrialFESpace(Q) + + Y = MultiFieldFESpace([V, Q]) + X = MultiFieldFESpace([U, P]) + + qdegree = 2*order+1 + Ω = Triangulation(model) + dΩ = Measure(Ω,qdegree) + + Γ = BoundaryTriangulation(model) + dΓ = Measure(Γ,qdegree) + n_Γ = get_normal_vector(Γ) + + Γ_D = BoundaryTriangulation(model;tags=["dirichlet"]) + dΓ_D = Measure(Γ_D,qdegree) + n_Γ_D = get_normal_vector(Γ_D) + + Γ_N = BoundaryTriangulation(model;tags="neumann") + dΓ_N = Measure(Γ_N,qdegree) + n_Γ_N = get_normal_vector(Γ_N) + + Λ = SkeletonTriangulation(model) + dΛ = Measure(Λ,qdegree) + n_Λ = get_normal_vector(Λ) + + h_e = CellField(map(get_array,local_views(∫(1)dΩ)),Ω) + h_e_Λ = CellField(map(get_array,local_views(∫(1)dΛ)),Λ) + h_e_Γ_D = CellField(map(get_array,local_views(∫(1)dΓ_D)),Γ_D) + + β_U = 50.0 + Δ_dg(u,v) = ∫(∇(v)⊙∇(u))dΩ - + ∫(jump(v⊗n_Λ)⊙(mean(∇(u))))dΛ - + ∫(mean(∇(v))⊙jump(u⊗n_Λ))dΛ - + ∫(v⋅(∇(u)⋅n_Γ_D))dΓ_D - + ∫((∇(v)⋅n_Γ_D)⋅u)dΓ_D + rhs((v,q)) = ∫((f_ref⋅v))*dΩ - ∫((∇(v)⋅n_Γ_D)⋅u_ref)dΓ_D + ∫((n_Γ_N⋅σ_ref)⋅v)*dΓ_N + + penalty(u,v) = ∫(jump(v⊗n_Λ)⊙((β_U/h_e_Λ*jump(u⊗n_Λ))))dΛ + ∫(v⋅(β_U/h_e_Γ_D*u))dΓ_D + penalty_rhs((v,q)) = ∫(v⋅(β_U/h_e_Γ_D*u_ref))dΓ_D + + a((u,p),(v,q)) = Δ_dg(u,v) + ∫(-(∇⋅v)*p - q*(∇⋅u))dΩ + penalty(u,v) + l((v,q)) = rhs((v,q)) - ∫(q*(∇⋅u_ref))dΩ + penalty_rhs((v,q)) + + op = AffineFEOperator(a,l,X,Y) + xh = solve(op) + + uh, ph = xh + err_u = l2_error(Ω,uh,sol.u) + err_p = l2_error(Ω,ph,sol.p) + tol = 1.0e-12 + @test err_u < tol + @test err_p < tol end end # module diff --git a/test/TestApp/.gitignore b/test/TestApp/.gitignore new file mode 100644 index 00000000..ba39cc53 --- /dev/null +++ b/test/TestApp/.gitignore @@ -0,0 +1 @@ +Manifest.toml diff --git a/test/TestApp/Project.toml b/test/TestApp/Project.toml index 9754ac89..31c314de 100644 --- a/test/TestApp/Project.toml +++ b/test/TestApp/Project.toml @@ -3,6 +3,7 @@ uuid = "3ba29202-0f57-4e69-8cbd-5c57d4c4860a" version = "0.1.0" [deps] +BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e" Gridap = "56d4f2e9-7ea1-5844-9cf6-b9c51ca7ce8e" GridapDistributed = "f9701e48-63b3-45aa-9a63-9bc6c271f355" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/test/TestApp/src/TestApp.jl b/test/TestApp/src/TestApp.jl index f6c40936..2e5aed87 100644 --- a/test/TestApp/src/TestApp.jl +++ b/test/TestApp/src/TestApp.jl @@ -12,4 +12,5 @@ module TestApp include("../../HeatEquationTests.jl") include("../../StokesOpenBoundaryTests.jl") include("../../AdaptivityTests.jl") + include("../../BlockSparseMatrixAssemblersTests.jl") end \ No newline at end of file diff --git a/test/mpi/runtests_np4_body.jl b/test/mpi/runtests_np4_body.jl index 0e8fbb6a..70f9c01c 100644 --- a/test/mpi/runtests_np4_body.jl +++ b/test/mpi/runtests_np4_body.jl @@ -45,5 +45,8 @@ function all_tests(distribute,parts) PArrays.toc!(t,"Adaptivity") end + TestApp.BlockSparseMatrixAssemblersTests.main(distribute,parts) + PArrays.toc!(t,"BlockSparseMatrixAssemblers") + display(t) end diff --git a/test/sequential/BlockSparseMatrixAssemblersTests.jl b/test/sequential/BlockSparseMatrixAssemblersTests.jl new file mode 100644 index 00000000..9c60126d --- /dev/null +++ b/test/sequential/BlockSparseMatrixAssemblersTests.jl @@ -0,0 +1,17 @@ +module BlockSparseMatrixAssemblersTestsSeq +using PartitionedArrays +include("../BlockSparseMatrixAssemblersTests.jl") + +with_debug() do distribute + BlockSparseMatrixAssemblersTests.main(distribute,(2,2)) +end + +with_debug() do distribute + BlockSparseMatrixAssemblersTests.main(distribute,(2,1)) +end + +with_debug() do distribute + BlockSparseMatrixAssemblersTests.main(distribute,(1,2)) +end + +end # module \ No newline at end of file diff --git a/test/sequential/runtests.jl b/test/sequential/runtests.jl index 12f82145..57da04ef 100644 --- a/test/sequential/runtests.jl +++ b/test/sequential/runtests.jl @@ -34,5 +34,8 @@ end @time @testset "StokesHdivDGTests.jl" begin include("StokesHdivDGTests.jl") end +@time @testset "BlockSparseMatrixAssemblers" begin + include("BlockSparseMatrixAssemblersTests.jl") +end end # module