From 759d7c69fcb5a333789a1c0b36c713d969c9c774 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Sun, 18 Jun 2023 11:19:36 +0930
Subject: [PATCH 01/56] Allowed kwargs for MultiFieldFESpace

---
 src/MultiField.jl                        |  4 +-
 test/BlockSparseMatrixAssemblersTests.jl | 56 ++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 2 deletions(-)
 create mode 100644 test/BlockSparseMatrixAssemblersTests.jl

diff --git a/src/MultiField.jl b/src/MultiField.jl
index 0c33f88e..b25a088c 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -239,11 +239,11 @@ end
 # Factory
 
 function MultiField.MultiFieldFESpace(
-  f_dspace::Vector{<:DistributedSingleFieldFESpace})
+  f_dspace::Vector{<:DistributedSingleFieldFESpace};kwargs...)
   f_p_space = map(local_views,f_dspace)
   v(x...) = collect(x)
   p_f_space = map_parts(v,f_p_space...)
-  p_mspace = map_parts(MultiFieldFESpace,p_f_space)
+  p_mspace = map_parts(f->MultiFieldFESpace(f;kwargs...),p_f_space)
   gids = generate_multi_field_gids(f_dspace,p_mspace)
   vector_type = _find_vector_type(p_mspace,gids)
   DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,vector_type)
diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
new file mode 100644
index 00000000..5eb1ef47
--- /dev/null
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -0,0 +1,56 @@
+using Test, LinearAlgebra
+
+using Gridap
+using Gridap.FESpaces, Gridap.ReferenceFEs, Gridap.MultiField
+
+using GridapDistributed
+using PartitionedArrays
+
+parts = get_part_ids(SequentialBackend(),(2,2))
+
+sol(x) = sum(x)
+
+model = CartesianDiscreteModel(parts,(0.0,1.0,0.0,1.0),(10,10))
+Ω = Triangulation(model)
+
+reffe = LagrangianRefFE(Float64,QUAD,1)
+V = FESpace(Ω, reffe; dirichlet_tags="boundary")
+U = TrialFESpace(sol,V)
+
+dΩ = Measure(Ω, 2)
+biform((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 - u1⋅v2)*dΩ
+liform((v1,v2)) = ∫(v1 - v2)*dΩ
+
+############################################################################################
+# Normal assembly 
+
+Y = MultiFieldFESpace([V,V])
+X = MultiFieldFESpace([U,U])
+
+u = get_trial_fe_basis(X)
+v = get_fe_basis(Y)
+
+data = collect_cell_matrix_and_vector(X,Y,biform(u,v),liform(v))
+matdata = collect_cell_matrix(X,Y,biform(u,v))
+vecdata = collect_cell_vector(Y,liform(v))  
+
+assem = SparseMatrixAssembler(X,Y)
+A1 = assemble_matrix(assem,matdata)
+b1 = assemble_vector(assem,vecdata)
+A2,b2 = assemble_matrix_and_vector(assem,data);
+
+############################################################################################
+# Block MultiFieldStyle
+
+mfs = BlockMultiFieldStyle()
+Yb = MultiFieldFESpace([V,V];style=mfs)
+Xb = MultiFieldFESpace([U,U];style=mfs)
+
+ub = get_trial_fe_basis(Xb)
+vb = get_fe_basis(Yb)
+
+bdata = collect_cell_matrix_and_vector(Xb,Yb,biform(ub,vb),liform(vb))
+bmatdata = collect_cell_matrix(Xb,Yb,biform(ub,vb))
+bvecdata = collect_cell_vector(Yb,liform(vb))
+
+assem_blocks = SparseMatrixAssembler(Xb,Yb)

From 881604fb1e76f4c490f00292fa3179ee62cbc19f Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Mon, 19 Jun 2023 19:03:54 +1000
Subject: [PATCH 02/56] Added constructor for BlockMatrixAssemblers

---
 Project.toml                                  |  1 +
 src/FESpaces.jl                               | 19 +++++--
 src/GridapDistributed.jl                      |  1 +
 src/MultiField.jl                             | 50 +++++++++++++++++--
 ...Tests.jl => BlockMatrixAssemblersTests.jl} |  7 +--
 5 files changed, 67 insertions(+), 11 deletions(-)
 rename test/{BlockSparseMatrixAssemblersTests.jl => BlockMatrixAssemblersTests.jl} (89%)

diff --git a/Project.toml b/Project.toml
index 931fe904..6e75d63c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,6 +4,7 @@ authors = ["S. Badia <santiago.badia@monash.edu>", "A. F. Martin <alberto.f.mart
 version = "0.2.7"
 
 [deps]
+BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 Gridap = "56d4f2e9-7ea1-5844-9cf6-b9c51ca7ce8e"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index 373118ee..1356893d 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -481,10 +481,21 @@ function _find_vector_type(spaces,gids)
   # new kw-arg global_vector_type ?
   # we use PVector for the moment
   local_vector_type = get_vector_type(get_part(spaces))
-  T = eltype(local_vector_type)
-  A = typeof(map_parts(i->local_vector_type(undef,0),gids.partition))
-  B = typeof(gids)
-  vector_type = PVector{T,A,B}
+
+  if local_vector_type <: BlockVector
+    T = eltype(local_vector_type)
+    A = typeof(map_parts(i->Vector{T}(undef,0),gids.partition))
+    B = typeof(gids)
+    block_type  = PVector{T,A,B}
+    vector_type = BlockVector{T,Vector{block_type}}
+  else
+    T = eltype(local_vector_type)
+    A = typeof(map_parts(i->local_vector_type(undef,0),gids.partition))
+    B = typeof(gids)
+    vector_type = PVector{T,A,B}
+  end
+
+  return vector_type
 end
 
 # Assembly
diff --git a/src/GridapDistributed.jl b/src/GridapDistributed.jl
index 7fe7ad9d..58ee80ea 100644
--- a/src/GridapDistributed.jl
+++ b/src/GridapDistributed.jl
@@ -21,6 +21,7 @@ const PArrays = PartitionedArrays
 using SparseArrays
 using WriteVTK
 using FillArrays
+using BlockArrays
 
 import Gridap.TensorValues: inner, outer, double_contraction, symmetric_part
 import LinearAlgebra: det, tr, cross, dot, ⋅
diff --git a/src/MultiField.jl b/src/MultiField.jl
index b25a088c..faebdc20 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -29,20 +29,21 @@ local_views(a::Vector{<:DistributedCellField}) = [ai.fields for ai in a]
 
 """
 """
-struct DistributedMultiFieldFESpace{A,B,C,D} <: DistributedFESpace
+struct DistributedMultiFieldFESpace{MS,A,B,C,D} <: DistributedFESpace
+  multi_field_style::MS
   field_fe_space::A
   part_fe_space::B
   gids::C
   vector_type::Type{D}
   function DistributedMultiFieldFESpace(
     field_fe_space::AbstractVector{<:DistributedSingleFieldFESpace},
-    part_fe_space::AbstractPData{<:MultiFieldFESpace},
+    part_fe_space::AbstractPData{<:MultiFieldFESpace{MS}},
     gids::PRange,
-    vector_type::Type{D}) where D
+    vector_type::Type{D}) where {D,MS}
     A = typeof(field_fe_space)
     B = typeof(part_fe_space)
     C = typeof(gids)
-    new{A,B,C,D}(field_fe_space,part_fe_space,gids,vector_type)
+    new{MS,A,B,C,D}(MS(),field_fe_space,part_fe_space,gids,vector_type)
   end
 end
 
@@ -70,6 +71,10 @@ function MultiField.restrict_to_field(
   PVector(values,gids)
 end
 
+function FESpaces.zero_free_values(f::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle})
+  return mortar(map(zero_free_values,f.field_fe_space))
+end
+
 function FESpaces.FEFunction(
   f::DistributedMultiFieldFESpace,x::AbstractVector,isconsistent=false)
   free_values = change_ghost(x,f.gids)
@@ -376,3 +381,40 @@ function propagate_to_ghost_multifield!(
     end
   end
 end
+
+
+# BlockMatrixAssemblers
+
+function FESpaces.SparseMatrixAssembler(
+  local_mat_type,
+  local_vec_type,
+  trial::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle},
+  test::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle},
+  par_strategy=SubAssembledRows())
+
+  block_idx = CartesianIndices((length(test),length(trial)))
+  block_assemblers = map(block_idx) do idx
+    Yi = test[idx[1]]; Xj = trial[idx[2]]
+    return SparseMatrixAssembler(local_mat_type,local_vec_type,Xj,Yi,par_strategy)
+  end
+
+  return BlockMatrixAssembler(block_assemblers)
+end
+
+function MultiField.select_block_matdata(matdata::AbstractPData,i::Integer,j::Integer)
+  map_parts(matdata) do matdata
+    MultiField.select_block_matdata(matdata,i,j)
+  end
+end
+
+function MultiField.select_block_vecdata(vecdata::AbstractPData,j::Integer)
+  map_parts(vecdata) do vecdata
+    MultiField.select_block_vecdata(vecdata,j)
+  end
+end
+
+function MultiField.select_block_matvecdata(matvecdata::AbstractPData,i::Integer,j::Integer)
+  map_parts(matvecdata) do matvecdata
+    MultiField.select_block_matvecdata(matvecdata,i,j)
+  end
+end
diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockMatrixAssemblersTests.jl
similarity index 89%
rename from test/BlockSparseMatrixAssemblersTests.jl
rename to test/BlockMatrixAssemblersTests.jl
index 5eb1ef47..44f529eb 100644
--- a/test/BlockSparseMatrixAssemblersTests.jl
+++ b/test/BlockMatrixAssemblersTests.jl
@@ -1,4 +1,4 @@
-using Test, LinearAlgebra
+using Test, LinearAlgebra, BlockArrays
 
 using Gridap
 using Gridap.FESpaces, Gridap.ReferenceFEs, Gridap.MultiField
@@ -10,11 +10,11 @@ parts = get_part_ids(SequentialBackend(),(2,2))
 
 sol(x) = sum(x)
 
-model = CartesianDiscreteModel(parts,(0.0,1.0,0.0,1.0),(10,10))
+model = CartesianDiscreteModel(parts,(0.0,1.0,0.0,1.0),(6,6))
 Ω = Triangulation(model)
 
 reffe = LagrangianRefFE(Float64,QUAD,1)
-V = FESpace(Ω, reffe; dirichlet_tags="boundary")
+V = FESpace(Ω, reffe)
 U = TrialFESpace(sol,V)
 
 dΩ = Measure(Ω, 2)
@@ -54,3 +54,4 @@ bmatdata = collect_cell_matrix(Xb,Yb,biform(ub,vb))
 bvecdata = collect_cell_vector(Yb,liform(vb))
 
 assem_blocks = SparseMatrixAssembler(Xb,Yb)
+A_blocks = assemble_matrix(assem_blocks,bmatdata)

From 39c30090822b7c2473fe998c98a0a5b2caff45f7 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 20 Jun 2023 11:31:51 +1000
Subject: [PATCH 03/56] Added missing methods to Matrix/Vector builders

---
 src/Algebra.jl | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index c14b214b..74118529 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -102,6 +102,12 @@ function Algebra.nz_counter(
   DistributedCounterCOO(builder.par_strategy,counters,rows,cols)
 end
 
+function Algebra.get_array_type(::PSparseMatrixBuilderCOO{Tv}) where Tv
+  T = eltype(Tv)
+  return PSparseMatrix{T}
+end
+
+
 """
 """
 struct DistributedCounterCOO{A,B,C,D} <: DistributedGridapType
@@ -386,6 +392,11 @@ function Algebra.nz_counter(builder::PVectorBuilder,axs::Tuple{<:PRange})
   PVectorCounter(builder.par_strategy,counters,rows)
 end
 
+function Algebra.get_array_type(::PVectorBuilder{Tv}) where Tv
+  T = eltype(Tv)
+  return PVector{T}
+end
+
 struct PVectorCounter{A,B,C}
   par_strategy::A
   counters::B

From e9ebafd86018649870e720a8b68ecfa97c2555b4 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 20 Jun 2023 11:32:31 +1000
Subject: [PATCH 04/56] Working version of distributed BlockMatrixAssemblers

---
 src/MultiField.jl                  | 10 ++++
 test/BlockMatrixAssemblersTests.jl | 77 ++++++++++++++++++++++++++++--
 2 files changed, 84 insertions(+), 3 deletions(-)

diff --git a/src/MultiField.jl b/src/MultiField.jl
index faebdc20..137092b0 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -401,6 +401,16 @@ function FESpaces.SparseMatrixAssembler(
   return BlockMatrixAssembler(block_assemblers)
 end
 
+function FESpaces.SparseMatrixAssembler(
+  trial::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle},
+  test::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle},
+  par_strategy=SubAssembledRows())
+  Tv = get_vector_type(get_part(local_views(first(trial))))
+  T  = eltype(Tv)
+  Tm = SparseMatrixCSC{T,Int}
+  SparseMatrixAssembler(Tm,Tv,trial,test,par_strategy)
+end
+
 function MultiField.select_block_matdata(matdata::AbstractPData,i::Integer,j::Integer)
   map_parts(matdata) do matdata
     MultiField.select_block_matdata(matdata,i,j)
diff --git a/test/BlockMatrixAssemblersTests.jl b/test/BlockMatrixAssemblersTests.jl
index 44f529eb..a99ccf92 100644
--- a/test/BlockMatrixAssemblersTests.jl
+++ b/test/BlockMatrixAssemblersTests.jl
@@ -18,8 +18,8 @@ V = FESpace(Ω, reffe)
 U = TrialFESpace(sol,V)
 
 dΩ = Measure(Ω, 2)
-biform((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 - u1⋅v2)*dΩ
-liform((v1,v2)) = ∫(v1 - v2)*dΩ
+biform((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2)*dΩ
+liform((v1,v2)) = ∫(v1 + v2)*dΩ
 
 ############################################################################################
 # Normal assembly 
@@ -53,5 +53,76 @@ bdata = collect_cell_matrix_and_vector(Xb,Yb,biform(ub,vb),liform(vb))
 bmatdata = collect_cell_matrix(Xb,Yb,biform(ub,vb))
 bvecdata = collect_cell_vector(Yb,liform(vb))
 
+############################################################################################
+# Block Assembly
+
+function same_vector(v1::PVector,v2::BlockVector,X)
+  v1i = map(i->restrict_to_field(X,v1,i),1:2)
+  for i in 1:length(v1i)
+    map_parts(v1i[i].owned_values,v2[Block(i)].owned_values) do v1,v2
+      @test (norm(v1 - v2) < 1.e-10)
+    end
+  end
+  return true
+end
+
+function LinearAlgebra.mul!(y::BlockVector,A::BlockMatrix,x::BlockVector)
+  o = one(eltype(A))
+  for i in blockaxes(A,1)
+    fill!(y[i],0.0)
+    for j in blockaxes(A,2)
+      mul!(y[i],A[i,j],x[j],o,o)
+    end
+  end
+end
+
 assem_blocks = SparseMatrixAssembler(Xb,Yb)
-A_blocks = assemble_matrix(assem_blocks,bmatdata)
+
+A1_blocks = assemble_matrix(assem_blocks,bmatdata);
+b1_blocks = assemble_vector(assem_blocks,bvecdata);
+
+y1_blocks = mortar(map(Aii->PVector(0.0,Aii.cols),A1_blocks.blocks[1,:]));
+x1_blocks = mortar(map(Aii->PVector(1.0,Aii.cols),A1_blocks.blocks[1,:]));
+
+mul!(y1_blocks,A1_blocks,x1_blocks)
+
+y1 = PVector(0.0,A1.cols)
+x1 = PVector(1.0,A1.cols)
+mul!(y1,A1,x1)
+
+@test same_vector(y1,y1_blocks,X)
+@test same_vector(b1,b1_blocks,Y)
+
+
+tests = []
+for i in blockaxes(A1_blocks,1)
+  for j in blockaxes(A1_blocks,2)
+    push!(tests,(oids_are_equal(y1_blocks[i].rows,A1_blocks[i,j].rows),
+    oids_are_equal(A1_blocks[i,j].cols,x1_blocks[j].rows),
+    hids_are_equal(A1_blocks[i,j].cols,x1_blocks[j].rows)))
+  end
+end
+
+A2_blocks, b2_blocks = assemble_matrix_and_vector(assem_blocks,bdata)
+@test A2_blocks ≈ A2
+@test b2_blocks ≈ b2
+
+A3_blocks = allocate_matrix(assem_blocks,bmatdata)
+b3_blocks = allocate_vector(assem_blocks,bvecdata)
+assemble_matrix!(A3_blocks,assem_blocks,bmatdata)
+assemble_vector!(b3_blocks,assem_blocks,bvecdata)
+@test A3_blocks ≈ A1_blocks
+@test b3_blocks ≈ b1_blocks
+
+A4_blocks, b4_blocks = allocate_matrix_and_vector(assem_blocks,bdata)
+assemble_matrix_and_vector!(A4_blocks,b4_blocks,assem_blocks,bdata)
+@test A4_blocks ≈ A2_blocks
+@test b4_blocks ≈ b2_blocks
+
+############################################################################################
+
+op = AffineFEOperator(biform,liform,X,Y)
+block_op = AffineFEOperator(biform,liform,Xb,Yb)
+
+@test get_matrix(op) ≈ get_matrix(block_op)
+@test get_vector(op) ≈ get_vector(block_op)

From 0f76d17d19a9e480633595d7e4c889edd6affde1 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 20 Jun 2023 16:11:46 +1000
Subject: [PATCH 05/56] Added empty block assembly filtering

---
 src/MultiField.jl                  | 30 +++++++++++++++++-----------
 test/BlockMatrixAssemblersTests.jl | 32 ++++--------------------------
 2 files changed, 22 insertions(+), 40 deletions(-)

diff --git a/src/MultiField.jl b/src/MultiField.jl
index 137092b0..63db9cdf 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -411,20 +411,26 @@ function FESpaces.SparseMatrixAssembler(
   SparseMatrixAssembler(Tm,Tv,trial,test,par_strategy)
 end
 
-function MultiField.select_block_matdata(matdata::AbstractPData,i::Integer,j::Integer)
-  map_parts(matdata) do matdata
-    MultiField.select_block_matdata(matdata,i,j)
-  end
-end
-
-function MultiField.select_block_vecdata(vecdata::AbstractPData,j::Integer)
-  map_parts(vecdata) do vecdata
-    MultiField.select_block_vecdata(vecdata,j)
+# select_block_Xdata
+for fun in [:select_block_matdata,:select_block_vecdata,:select_block_matvecdata]
+  @eval begin
+    function MultiField.$fun(data::AbstractPData,s::Tuple)
+      map_parts(data) do data
+        MultiField.$fun(data,s)
+      end
+    end
   end
 end
 
-function MultiField.select_block_matvecdata(matvecdata::AbstractPData,i::Integer,j::Integer)
-  map_parts(matvecdata) do matvecdata
-    MultiField.select_block_matvecdata(matvecdata,i,j)
+# select_touched_blocks_Xdata
+for fun in [:select_touched_blocks_matdata,:select_touched_blocks_vecdata,:select_touched_blocks_matvecdata]
+  @eval begin
+    function MultiField.$fun(data::AbstractPData,s::Tuple)
+      touched = map_parts(data) do data
+        MultiField.$fun(data,s)
+      end
+      return get_part(touched)
+      #return reduce(.|,touched; init=fill(false,s))
+    end
   end
 end
diff --git a/test/BlockMatrixAssemblersTests.jl b/test/BlockMatrixAssemblersTests.jl
index a99ccf92..5ed4bc8f 100644
--- a/test/BlockMatrixAssemblersTests.jl
+++ b/test/BlockMatrixAssemblersTests.jl
@@ -53,6 +53,10 @@ bdata = collect_cell_matrix_and_vector(Xb,Yb,biform(ub,vb),liform(vb))
 bmatdata = collect_cell_matrix(Xb,Yb,biform(ub,vb))
 bvecdata = collect_cell_vector(Yb,liform(vb))
 
+
+touched = MultiField.select_touched_blocks_vecdata(bvecdata,(2,1))
+
+
 ############################################################################################
 # Block Assembly
 
@@ -93,36 +97,8 @@ mul!(y1,A1,x1)
 @test same_vector(y1,y1_blocks,X)
 @test same_vector(b1,b1_blocks,Y)
 
-
-tests = []
-for i in blockaxes(A1_blocks,1)
-  for j in blockaxes(A1_blocks,2)
-    push!(tests,(oids_are_equal(y1_blocks[i].rows,A1_blocks[i,j].rows),
-    oids_are_equal(A1_blocks[i,j].cols,x1_blocks[j].rows),
-    hids_are_equal(A1_blocks[i,j].cols,x1_blocks[j].rows)))
-  end
-end
-
-A2_blocks, b2_blocks = assemble_matrix_and_vector(assem_blocks,bdata)
-@test A2_blocks ≈ A2
-@test b2_blocks ≈ b2
-
-A3_blocks = allocate_matrix(assem_blocks,bmatdata)
-b3_blocks = allocate_vector(assem_blocks,bvecdata)
-assemble_matrix!(A3_blocks,assem_blocks,bmatdata)
-assemble_vector!(b3_blocks,assem_blocks,bvecdata)
-@test A3_blocks ≈ A1_blocks
-@test b3_blocks ≈ b1_blocks
-
-A4_blocks, b4_blocks = allocate_matrix_and_vector(assem_blocks,bdata)
-assemble_matrix_and_vector!(A4_blocks,b4_blocks,assem_blocks,bdata)
-@test A4_blocks ≈ A2_blocks
-@test b4_blocks ≈ b2_blocks
-
 ############################################################################################
 
 op = AffineFEOperator(biform,liform,X,Y)
 block_op = AffineFEOperator(biform,liform,Xb,Yb)
 
-@test get_matrix(op) ≈ get_matrix(block_op)
-@test get_vector(op) ≈ get_vector(block_op)

From 3ebe25116faad9252009c2313c7d1141a599e0fb Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 20 Jun 2023 16:31:10 +1000
Subject: [PATCH 06/56] Added zero_block for PSparseMatrix

---
 src/FESpaces.jl                    |  4 +--
 src/MultiField.jl                  | 39 +++++++++++++++++++++---------
 test/BlockMatrixAssemblersTests.jl |  4 ---
 3 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index 1356893d..b566760c 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -486,8 +486,8 @@ function _find_vector_type(spaces,gids)
     T = eltype(local_vector_type)
     A = typeof(map_parts(i->Vector{T}(undef,0),gids.partition))
     B = typeof(gids)
-    block_type  = PVector{T,A,B}
-    vector_type = BlockVector{T,Vector{block_type}}
+    vector_type  = PVector{T,A,B}
+    #vector_type = BlockVector{T,Vector{block_type}}
   else
     T = eltype(local_vector_type)
     A = typeof(map_parts(i->local_vector_type(undef,0),gids.partition))
diff --git a/src/MultiField.jl b/src/MultiField.jl
index 63db9cdf..58d1e872 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -71,9 +71,9 @@ function MultiField.restrict_to_field(
   PVector(values,gids)
 end
 
-function FESpaces.zero_free_values(f::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle})
-  return mortar(map(zero_free_values,f.field_fe_space))
-end
+#function FESpaces.zero_free_values(f::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle})
+#  return mortar(map(zero_free_values,f.field_fe_space))
+#end
 
 function FESpaces.FEFunction(
   f::DistributedMultiFieldFESpace,x::AbstractVector,isconsistent=false)
@@ -411,18 +411,24 @@ function FESpaces.SparseMatrixAssembler(
   SparseMatrixAssembler(Tm,Tv,trial,test,par_strategy)
 end
 
-# select_block_Xdata
-for fun in [:select_block_matdata,:select_block_vecdata,:select_block_matvecdata]
-  @eval begin
-    function MultiField.$fun(data::AbstractPData,s::Tuple)
-      map_parts(data) do data
-        MultiField.$fun(data,s)
-      end
-    end
+function MultiField.select_block_matdata(matdata::AbstractPData,i::Integer,j::Integer)
+  map_parts(matdata) do matdata
+    MultiField.select_block_matdata(matdata,i,j)
+  end
+end
+
+function MultiField.select_block_vecdata(vecdata::AbstractPData,j::Integer)
+  map_parts(vecdata) do vecdata
+    MultiField.select_block_vecdata(vecdata,j)
+  end
+end
+
+function MultiField.select_block_matvecdata(matvecdata::AbstractPData,i::Integer,j::Integer)
+  map_parts(matvecdata) do matvecdata
+    MultiField.select_block_matvecdata(matvecdata,i,j)
   end
 end
 
-# select_touched_blocks_Xdata
 for fun in [:select_touched_blocks_matdata,:select_touched_blocks_vecdata,:select_touched_blocks_matvecdata]
   @eval begin
     function MultiField.$fun(data::AbstractPData,s::Tuple)
@@ -434,3 +440,12 @@ for fun in [:select_touched_blocks_matdata,:select_touched_blocks_vecdata,:selec
     end
   end
 end
+
+function MultiField.zero_block(::Type{<:PSparseMatrix},a::DistributedSparseMatrixAssembler)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  mats = map_parts(local_views(a)) do a
+    MultiField.zero_block(get_matrix_type(a),a)
+  end
+  return PSparseMatrix(mats,rows,cols)
+end
diff --git a/test/BlockMatrixAssemblersTests.jl b/test/BlockMatrixAssemblersTests.jl
index 5ed4bc8f..00dd64ad 100644
--- a/test/BlockMatrixAssemblersTests.jl
+++ b/test/BlockMatrixAssemblersTests.jl
@@ -53,10 +53,6 @@ bdata = collect_cell_matrix_and_vector(Xb,Yb,biform(ub,vb),liform(vb))
 bmatdata = collect_cell_matrix(Xb,Yb,biform(ub,vb))
 bvecdata = collect_cell_vector(Yb,liform(vb))
 
-
-touched = MultiField.select_touched_blocks_vecdata(bvecdata,(2,1))
-
-
 ############################################################################################
 # Block Assembly
 

From 0507872447cffffcfac62de772471abf804fc8f7 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Wed, 21 Jun 2023 14:56:17 +1000
Subject: [PATCH 07/56] Block-Vector matrix multiplication works

---
 src/MultiField.jl                  |  9 +++---
 test/BlockMatrixAssemblersTests.jl | 50 ++++++++++++++++++++----------
 2 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/src/MultiField.jl b/src/MultiField.jl
index 58d1e872..0ae89f21 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -432,10 +432,11 @@ end
 for fun in [:select_touched_blocks_matdata,:select_touched_blocks_vecdata,:select_touched_blocks_matvecdata]
   @eval begin
     function MultiField.$fun(data::AbstractPData,s::Tuple)
-      touched = map_parts(data) do data
-        MultiField.$fun(data,s)
-      end
-      return get_part(touched)
+      return fill(true,s)
+      #touched = map_parts(data) do data
+      #  MultiField.$fun(data,s)
+      #end
+      #return get_part(touched)
       #return reduce(.|,touched; init=fill(false,s))
     end
   end
diff --git a/test/BlockMatrixAssemblersTests.jl b/test/BlockMatrixAssemblersTests.jl
index 00dd64ad..2b3b76b8 100644
--- a/test/BlockMatrixAssemblersTests.jl
+++ b/test/BlockMatrixAssemblersTests.jl
@@ -10,14 +10,14 @@ parts = get_part_ids(SequentialBackend(),(2,2))
 
 sol(x) = sum(x)
 
-model = CartesianDiscreteModel(parts,(0.0,1.0,0.0,1.0),(6,6))
+model = CartesianDiscreteModel(parts,(0.0,1.0,0.0,1.0),(12,12))
 Ω = Triangulation(model)
 
 reffe = LagrangianRefFE(Float64,QUAD,1)
 V = FESpace(Ω, reffe)
 U = TrialFESpace(sol,V)
 
-dΩ = Measure(Ω, 2)
+dΩ = Measure(Ω, 4)
 biform((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2)*dΩ
 liform((v1,v2)) = ∫(v1 + v2)*dΩ
 
@@ -34,11 +34,14 @@ data = collect_cell_matrix_and_vector(X,Y,biform(u,v),liform(v))
 matdata = collect_cell_matrix(X,Y,biform(u,v))
 vecdata = collect_cell_vector(Y,liform(v))  
 
-assem = SparseMatrixAssembler(X,Y)
+assem = SparseMatrixAssembler(X,Y,FullyAssembledRows())
 A1 = assemble_matrix(assem,matdata)
 b1 = assemble_vector(assem,vecdata)
 A2,b2 = assemble_matrix_and_vector(assem,data);
 
+assem11 = SparseMatrixAssembler(U,V,FullyAssembledRows())
+A11 = assemble_matrix((u1,v1)->∫(∇(u1)⋅∇(v1))*dΩ,assem11,U,V)
+
 ############################################################################################
 # Block MultiFieldStyle
 
@@ -56,19 +59,20 @@ bvecdata = collect_cell_vector(Yb,liform(vb))
 ############################################################################################
 # Block Assembly
 
-function same_vector(v1::PVector,v2::BlockVector,X)
-  v1i = map(i->restrict_to_field(X,v1,i),1:2)
-  for i in 1:length(v1i)
-    map_parts(v1i[i].owned_values,v2[Block(i)].owned_values) do v1,v2
-      @test (norm(v1 - v2) < 1.e-10)
-    end
+function same_solution(x1::PVector,x2::BlockVector,X,dΩ)
+  u1 = [FEFunction(X,x1)...]
+  u2 = map(i->FEFunction(X[i],x2[Block(i)]),1:blocklength(x2))
+
+  err = map(u1,u2) do u1,u2
+    eh = u1-u2
+    return sum(∫(eh⋅eh)dΩ)
   end
-  return true
+  return err
 end
 
 function LinearAlgebra.mul!(y::BlockVector,A::BlockMatrix,x::BlockVector)
   o = one(eltype(A))
-  for i in blockaxes(A,1)
+  for i in blockaxes(A,2)
     fill!(y[i],0.0)
     for j in blockaxes(A,2)
       mul!(y[i],A[i,j],x[j],o,o)
@@ -76,25 +80,37 @@ function LinearAlgebra.mul!(y::BlockVector,A::BlockMatrix,x::BlockVector)
   end
 end
 
-assem_blocks = SparseMatrixAssembler(Xb,Yb)
+function test_axes(c::BlockVector,a::BlockMatrix,b::BlockVector)
+  tests = []
+  for i in blockaxes(a,1)
+    for j in blockaxes(a,2)
+      push!(tests,
+      (oids_are_equal(c[i].rows,a[i,j].rows),
+      oids_are_equal(a[i,j].cols,b[j].rows),
+      hids_are_equal(a[i,j].cols,b[j].rows)))
+    end
+  end
+  return tests
+end
+
+assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
 
 A1_blocks = assemble_matrix(assem_blocks,bmatdata);
 b1_blocks = assemble_vector(assem_blocks,bvecdata);
 
-y1_blocks = mortar(map(Aii->PVector(0.0,Aii.cols),A1_blocks.blocks[1,:]));
+y1_blocks = mortar(map(Aii->PVector(0.0,Aii.rows),A1_blocks.blocks[:,1]));
 x1_blocks = mortar(map(Aii->PVector(1.0,Aii.cols),A1_blocks.blocks[1,:]));
+test_axes(y1_blocks,A1_blocks,x1_blocks)
 
 mul!(y1_blocks,A1_blocks,x1_blocks)
 
-y1 = PVector(0.0,A1.cols)
+y1 = PVector(0.0,A1.rows)
 x1 = PVector(1.0,A1.cols)
 mul!(y1,A1,x1)
 
-@test same_vector(y1,y1_blocks,X)
-@test same_vector(b1,b1_blocks,Y)
+@test all(same_solution(y1,y1_blocks,X,dΩ) .< 1e-10)
 
 ############################################################################################
 
 op = AffineFEOperator(biform,liform,X,Y)
 block_op = AffineFEOperator(biform,liform,Xb,Yb)
-

From 78a53da0cb36ff1193c0caf62dc6fb6f7065415f Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Wed, 21 Jun 2023 17:18:00 +1000
Subject: [PATCH 08/56] AffineFEOperators working for Block assemblers

---
 src/MultiField.jl | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/MultiField.jl b/src/MultiField.jl
index 0ae89f21..a02611eb 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -429,6 +429,18 @@ function MultiField.select_block_matvecdata(matvecdata::AbstractPData,i::Integer
   end
 end
 
+function MultiField.combine_matdata(data1::AbstractPData,data2::AbstractPData)
+  map_parts(data1,data2) do data1,data2
+    MultiField.combine_matdata(data1,data2)
+  end
+end
+
+function MultiField.recombine_data(matvecdata::AbstractPData,matdata::AbstractPData,vecdata::AbstractPData)
+  map_parts(matvecdata,matdata,vecdata) do matvecdata,matdata,vecdata
+    (matvecdata,matdata,vecdata)
+  end
+end
+
 for fun in [:select_touched_blocks_matdata,:select_touched_blocks_vecdata,:select_touched_blocks_matvecdata]
   @eval begin
     function MultiField.$fun(data::AbstractPData,s::Tuple)

From 409c7ad2db7f703a37fedd30ba670601176b32fa Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Sun, 2 Jul 2023 19:37:45 +1000
Subject: [PATCH 09/56] Started implementing the new version of block
 assemblers

---
 src/MultiField.jl                  | 89 ++++++++++++++++++------------
 test/BlockMatrixAssemblersTests.jl | 20 +++++++
 2 files changed, 74 insertions(+), 35 deletions(-)

diff --git a/src/MultiField.jl b/src/MultiField.jl
index a02611eb..17fb301c 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -383,7 +383,7 @@ function propagate_to_ghost_multifield!(
 end
 
 
-# BlockMatrixAssemblers
+# BlockSparseMatrixAssemblers
 
 function FESpaces.SparseMatrixAssembler(
   local_mat_type,
@@ -398,7 +398,7 @@ function FESpaces.SparseMatrixAssembler(
     return SparseMatrixAssembler(local_mat_type,local_vec_type,Xj,Yi,par_strategy)
   end
 
-  return BlockMatrixAssembler(block_assemblers)
+  return MultiField.BlockSparseMatrixAssembler(block_assemblers)
 end
 
 function FESpaces.SparseMatrixAssembler(
@@ -411,54 +411,73 @@ function FESpaces.SparseMatrixAssembler(
   SparseMatrixAssembler(Tm,Tv,trial,test,par_strategy)
 end
 
-function MultiField.select_block_matdata(matdata::AbstractPData,i::Integer,j::Integer)
-  map_parts(matdata) do matdata
-    MultiField.select_block_matdata(matdata,i,j)
+function local_views(a::MultiField.BlockSparseMatrixAssembler{<:DistributedSparseMatrixAssembler})
+  assems = a.block_assemblers
+  parts = get_part_ids(local_views(first(assems)))
+  map_parts(parts) do p
+    idx = CartesianIndices(axes(assems))
+    block_assems = map(idx) do I
+      get_part(local_views(assems[I]),p)
+    end
+    return MultiField.BlockSparseMatrixAssembler(block_assems)
   end
 end
 
-function MultiField.select_block_vecdata(vecdata::AbstractPData,j::Integer)
-  map_parts(vecdata) do vecdata
-    MultiField.select_block_vecdata(vecdata,j)
+function local_views(a::MatrixBlock,rows,cols)
+  parts = get_part_ids(local_views(first(a.array)))
+  map_parts(parts) do p
+    idx = CartesianIndices(axes(a))
+    array = map(idx) do I
+      get_part(local_views(a[I],rows[I[1]],cols[I[2]]),p)
+    end
+    ArrayBlock(array,a.touched)
   end
 end
 
-function MultiField.select_block_matvecdata(matvecdata::AbstractPData,i::Integer,j::Integer)
-  map_parts(matvecdata) do matvecdata
-    MultiField.select_block_matvecdata(matvecdata,i,j)
+function local_views(a::VectorBlock,rows)
+  parts = get_part_ids(local_views(first(a.array)))
+  map_parts(parts) do p
+    idx = CartesianIndices(axes(a))
+    array = map(idx) do I
+      get_part(local_views(a[I],rows[I]),p)
+    end
+    ArrayBlock(array,a.touched)
   end
 end
 
-function MultiField.combine_matdata(data1::AbstractPData,data2::AbstractPData)
-  map_parts(data1,data2) do data1,data2
-    MultiField.combine_matdata(data1,data2)
-  end
+
+#! The following functions could be avoided if we created am abstract superclass for
+#! DistributedSparseMatrixAssembler
+function FESpaces.symbolic_loop_matrix!(A,a::MultiField.BlockSparseMatrixAssembler,matdata::AbstractPData)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map_parts(symbolic_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata)
 end
 
-function MultiField.recombine_data(matvecdata::AbstractPData,matdata::AbstractPData,vecdata::AbstractPData)
-  map_parts(matvecdata,matdata,vecdata) do matvecdata,matdata,vecdata
-    (matvecdata,matdata,vecdata)
-  end
+function FESpaces.numeric_loop_matrix!(A,a::MultiField.BlockSparseMatrixAssembler,matdata::AbstractPData)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map_parts(numeric_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata)
 end
 
-for fun in [:select_touched_blocks_matdata,:select_touched_blocks_vecdata,:select_touched_blocks_matvecdata]
-  @eval begin
-    function MultiField.$fun(data::AbstractPData,s::Tuple)
-      return fill(true,s)
-      #touched = map_parts(data) do data
-      #  MultiField.$fun(data,s)
-      #end
-      #return get_part(touched)
-      #return reduce(.|,touched; init=fill(false,s))
-    end
-  end
+function FESpaces.symbolic_loop_vector!(b,a::MultiField.BlockSparseMatrixAssembler,vecdata::AbstractPData)
+  rows = get_rows(a)
+  map_parts(symbolic_loop_vector!,local_views(b,rows),local_views(a),vecdata)
+end
+
+function FESpaces.numeric_loop_vector!(b,a::MultiField.BlockSparseMatrixAssembler,vecdata::AbstractPData)
+  rows = get_rows(a)
+  map_parts(numeric_loop_vector!,local_views(b,rows),local_views(a),vecdata)
 end
 
-function MultiField.zero_block(::Type{<:PSparseMatrix},a::DistributedSparseMatrixAssembler)
+function FESpaces.symbolic_loop_matrix_and_vector!(A,b,a::MultiField.BlockSparseMatrixAssembler,data::AbstractPData)
   rows = get_rows(a)
   cols = get_cols(a)
-  mats = map_parts(local_views(a)) do a
-    MultiField.zero_block(get_matrix_type(a),a)
-  end
-  return PSparseMatrix(mats,rows,cols)
+  map_parts(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
+end
+
+function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::MultiField.BlockSparseMatrixAssembler,data::AbstractPData)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map_parts(numeric_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
 end
diff --git a/test/BlockMatrixAssemblersTests.jl b/test/BlockMatrixAssemblersTests.jl
index 2b3b76b8..ad0ed936 100644
--- a/test/BlockMatrixAssemblersTests.jl
+++ b/test/BlockMatrixAssemblersTests.jl
@@ -95,6 +95,26 @@ end
 
 assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
 
+using Gridap.Fields: ArrayBlock, MatrixBlock, VectorBlock
+using Gridap.FESpaces: nz_counter, nz_allocation,create_from_nz
+using Gridap.FESpaces: symbolic_loop_matrix!, numeric_loop_matrix!
+using Gridap.Helpers
+using Gridap.FESpaces: get_assembly_strategy
+
+mat_builders = get_matrix_builder(assem_blocks)
+rows = get_rows(assem_blocks)
+cols = get_cols(assem_blocks)
+
+m1 = nz_counter(mat_builders,(rows,cols))
+symbolic_loop_matrix!(m1,assem_blocks,bmatdata)
+m2 = nz_allocation(m1)
+numeric_loop_matrix!(m2,assem_blocks,bmatdata)
+m3 = create_from_nz(m2)
+
+
+strat = get_assembly_strategy(assem_blocks)
+
+
 A1_blocks = assemble_matrix(assem_blocks,bmatdata);
 b1_blocks = assemble_vector(assem_blocks,bvecdata);
 

From 669fc5438742ade1639c68a5fbdf12f022ca1ebd Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Sun, 2 Jul 2023 20:31:09 +0930
Subject: [PATCH 10/56] Working version

---
 test/BlockMatrixAssemblersTests.jl | 38 ++++++++----------------------
 1 file changed, 10 insertions(+), 28 deletions(-)

diff --git a/test/BlockMatrixAssemblersTests.jl b/test/BlockMatrixAssemblersTests.jl
index ad0ed936..fc729b6e 100644
--- a/test/BlockMatrixAssemblersTests.jl
+++ b/test/BlockMatrixAssemblersTests.jl
@@ -18,7 +18,7 @@ V = FESpace(Ω, reffe)
 U = TrialFESpace(sol,V)
 
 dΩ = Measure(Ω, 4)
-biform((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2)*dΩ
+biform((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 - u2⋅v1)*dΩ
 liform((v1,v2)) = ∫(v1 + v2)*dΩ
 
 ############################################################################################
@@ -81,45 +81,27 @@ function LinearAlgebra.mul!(y::BlockVector,A::BlockMatrix,x::BlockVector)
 end
 
 function test_axes(c::BlockVector,a::BlockMatrix,b::BlockVector)
-  tests = []
+  res = Matrix(undef,blocksize(a)...)
   for i in blockaxes(a,1)
     for j in blockaxes(a,2)
-      push!(tests,
-      (oids_are_equal(c[i].rows,a[i,j].rows),
+      res[i.n[1],j.n[1]] = Tuple([oids_are_equal(c[i].rows,a[i,j].rows),
       oids_are_equal(a[i,j].cols,b[j].rows),
-      hids_are_equal(a[i,j].cols,b[j].rows)))
+      hids_are_equal(a[i,j].cols,b[j].rows)])
     end
   end
-  return tests
+  return res
 end
 
-assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
-
-using Gridap.Fields: ArrayBlock, MatrixBlock, VectorBlock
-using Gridap.FESpaces: nz_counter, nz_allocation,create_from_nz
-using Gridap.FESpaces: symbolic_loop_matrix!, numeric_loop_matrix!
-using Gridap.Helpers
-using Gridap.FESpaces: get_assembly_strategy
-
-mat_builders = get_matrix_builder(assem_blocks)
-rows = get_rows(assem_blocks)
-cols = get_cols(assem_blocks)
-
-m1 = nz_counter(mat_builders,(rows,cols))
-symbolic_loop_matrix!(m1,assem_blocks,bmatdata)
-m2 = nz_allocation(m1)
-numeric_loop_matrix!(m2,assem_blocks,bmatdata)
-m3 = create_from_nz(m2)
-
-
-strat = get_assembly_strategy(assem_blocks)
+#! TODO: Does not work if there are empty blocks due to PRange checks when multiplying. 
+#! Maybe we should change to MatrixBlocks?  
 
+assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
 
 A1_blocks = assemble_matrix(assem_blocks,bmatdata);
 b1_blocks = assemble_vector(assem_blocks,bvecdata);
 
-y1_blocks = mortar(map(Aii->PVector(0.0,Aii.rows),A1_blocks.blocks[:,1]));
-x1_blocks = mortar(map(Aii->PVector(1.0,Aii.cols),A1_blocks.blocks[1,:]));
+y1_blocks = mortar(map(Aii->PVector(0.0,Aii.rows),diag(A1_blocks.blocks)));
+x1_blocks = mortar(map(Aii->PVector(1.0,Aii.cols),diag(A1_blocks.blocks)));
 test_axes(y1_blocks,A1_blocks,x1_blocks)
 
 mul!(y1_blocks,A1_blocks,x1_blocks)

From fe07a312e640bf5adebf77824f25b1354cdce83c Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 4 Jul 2023 21:59:20 +0800
Subject: [PATCH 11/56] Updated with Gridap changes

---
 src/MultiField.jl                  | 25 +++++++++++++++++--------
 test/BlockMatrixAssemblersTests.jl |  4 ++--
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/MultiField.jl b/src/MultiField.jl
index 17fb301c..3355e0c9 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -388,17 +388,26 @@ end
 function FESpaces.SparseMatrixAssembler(
   local_mat_type,
   local_vec_type,
-  trial::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle},
-  test::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle},
-  par_strategy=SubAssembledRows())
+  trial::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}},
+  test::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}},
+  par_strategy=SubAssembledRows()) where {NB,SB,P}
+
+  # Build block spaces #! TODO: Eliminate this, build the PRANGES directly
+  function get_block_fespace(spaces,range)
+    (length(range) == 1) ? spaces[range[1]] : MultiFieldFESpace(spaces[range])
+  end
+  NV = length(trial.field_fe_space)
+  block_ranges = MultiField.get_block_ranges(NB,SB,P)
+  block_tests  = map(range -> get_block_fespace(test.field_fe_space,range),block_ranges)
+  block_trials = map(range -> get_block_fespace(trial.field_fe_space,range),block_ranges)
 
   block_idx = CartesianIndices((length(test),length(trial)))
   block_assemblers = map(block_idx) do idx
-    Yi = test[idx[1]]; Xj = trial[idx[2]]
+    Yi = block_tests[idx[1]]; Xj = block_trials[idx[2]]
     return SparseMatrixAssembler(local_mat_type,local_vec_type,Xj,Yi,par_strategy)
   end
 
-  return MultiField.BlockSparseMatrixAssembler(block_assemblers)
+  return MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}(block_assemblers)
 end
 
 function FESpaces.SparseMatrixAssembler(
@@ -411,7 +420,7 @@ function FESpaces.SparseMatrixAssembler(
   SparseMatrixAssembler(Tm,Tv,trial,test,par_strategy)
 end
 
-function local_views(a::MultiField.BlockSparseMatrixAssembler{<:DistributedSparseMatrixAssembler})
+function local_views(a::MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}) where {NB,NV,SB,P}
   assems = a.block_assemblers
   parts = get_part_ids(local_views(first(assems)))
   map_parts(parts) do p
@@ -419,7 +428,7 @@ function local_views(a::MultiField.BlockSparseMatrixAssembler{<:DistributedSpars
     block_assems = map(idx) do I
       get_part(local_views(assems[I]),p)
     end
-    return MultiField.BlockSparseMatrixAssembler(block_assems)
+    return MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}(block_assems)
   end
 end
 
@@ -446,7 +455,7 @@ function local_views(a::VectorBlock,rows)
 end
 
 
-#! The following functions could be avoided if we created am abstract superclass for
+#! The following functions could be avoided if we created an abstract superclass for
 #! DistributedSparseMatrixAssembler
 function FESpaces.symbolic_loop_matrix!(A,a::MultiField.BlockSparseMatrixAssembler,matdata::AbstractPData)
   rows = get_rows(a)
diff --git a/test/BlockMatrixAssemblersTests.jl b/test/BlockMatrixAssemblersTests.jl
index fc729b6e..9180dd2d 100644
--- a/test/BlockMatrixAssemblersTests.jl
+++ b/test/BlockMatrixAssemblersTests.jl
@@ -46,8 +46,8 @@ A11 = assemble_matrix((u1,v1)->∫(∇(u1)⋅∇(v1))*dΩ,assem11,U,V)
 # Block MultiFieldStyle
 
 mfs = BlockMultiFieldStyle()
-Yb = MultiFieldFESpace([V,V];style=mfs)
-Xb = MultiFieldFESpace([U,U];style=mfs)
+Yb  = MultiFieldFESpace([V,V];style=mfs)
+Xb  = MultiFieldFESpace([U,U];style=mfs)
 
 ub = get_trial_fe_basis(Xb)
 vb = get_fe_basis(Yb)

From b6c120cc1b11b00e139fde32eb0f48672f07557c Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 4 Jul 2023 22:17:25 +0800
Subject: [PATCH 12/56] Combined-block assembly working

---
 src/MultiField.jl                  |  9 ++++++++-
 test/BlockMatrixAssemblersTests.jl | 29 +++++++++++++++++++----------
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/src/MultiField.jl b/src/MultiField.jl
index 3355e0c9..ea50785b 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -401,7 +401,7 @@ function FESpaces.SparseMatrixAssembler(
   block_tests  = map(range -> get_block_fespace(test.field_fe_space,range),block_ranges)
   block_trials = map(range -> get_block_fespace(trial.field_fe_space,range),block_ranges)
 
-  block_idx = CartesianIndices((length(test),length(trial)))
+  block_idx = CartesianIndices((NB,NB))
   block_assemblers = map(block_idx) do idx
     Yi = block_tests[idx[1]]; Xj = block_trials[idx[2]]
     return SparseMatrixAssembler(local_mat_type,local_vec_type,Xj,Yi,par_strategy)
@@ -454,6 +454,13 @@ function local_views(a::VectorBlock,rows)
   end
 end
 
+function local_views(a::MultiField.ArrayBlockView,axes...)
+  array = local_views(a.array,axes...)
+  map_parts(array) do array
+    MultiField.ArrayBlockView(array,a.block_map)
+  end
+end
+
 
 #! The following functions could be avoided if we created an abstract superclass for
 #! DistributedSparseMatrixAssembler
diff --git a/test/BlockMatrixAssemblersTests.jl b/test/BlockMatrixAssemblersTests.jl
index 9180dd2d..3a48236f 100644
--- a/test/BlockMatrixAssemblersTests.jl
+++ b/test/BlockMatrixAssemblersTests.jl
@@ -18,14 +18,14 @@ V = FESpace(Ω, reffe)
 U = TrialFESpace(sol,V)
 
 dΩ = Measure(Ω, 4)
-biform((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 - u2⋅v1)*dΩ
-liform((v1,v2)) = ∫(v1 + v2)*dΩ
+biform((u1,u2,u3),(v1,v2,v3)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 - u2⋅v1 - v3⋅u3 + v3⋅u1 - v1⋅u3)*dΩ
+liform((v1,v2,v3)) = ∫(v1 + v2 - v3)*dΩ
 
 ############################################################################################
 # Normal assembly 
 
-Y = MultiFieldFESpace([V,V])
-X = MultiFieldFESpace([U,U])
+Y = MultiFieldFESpace([V,V,V])
+X = MultiFieldFESpace([U,U,U])
 
 u = get_trial_fe_basis(X)
 v = get_fe_basis(Y)
@@ -45,9 +45,11 @@ A11 = assemble_matrix((u1,v1)->∫(∇(u1)⋅∇(v1))*dΩ,assem11,U,V)
 ############################################################################################
 # Block MultiFieldStyle
 
-mfs = BlockMultiFieldStyle()
-Yb  = MultiFieldFESpace([V,V];style=mfs)
-Xb  = MultiFieldFESpace([U,U];style=mfs)
+#mfs = BlockMultiFieldStyle()
+mfs = BlockMultiFieldStyle(2,(1,2))
+
+Yb  = MultiFieldFESpace([V,V,V];style=mfs)
+Xb  = MultiFieldFESpace([U,U,U];style=mfs)
 
 ub = get_trial_fe_basis(Xb)
 vb = get_fe_basis(Yb)
@@ -59,9 +61,9 @@ bvecdata = collect_cell_vector(Yb,liform(vb))
 ############################################################################################
 # Block Assembly
 
-function same_solution(x1::PVector,x2::BlockVector,X,dΩ)
+function same_solution(x1::PVector,x2::BlockVector,X,Xi,dΩ)
   u1 = [FEFunction(X,x1)...]
-  u2 = map(i->FEFunction(X[i],x2[Block(i)]),1:blocklength(x2))
+  u2 = map(i->FEFunction(Xi[i],x2[Block(i)]),1:blocklength(x2))
 
   err = map(u1,u2) do u1,u2
     eh = u1-u2
@@ -92,6 +94,13 @@ function test_axes(c::BlockVector,a::BlockMatrix,b::BlockVector)
   return res
 end
 
+function get_block_fespace(spaces,range)
+  (length(range) == 1) ? spaces[range[1]] : MultiFieldFESpace(spaces[range])
+end
+
+block_ranges = Gridap.MultiField.get_block_ranges(2,(1,2),(1,2,3))
+block_trials = map(range -> get_block_fespace(X.field_fe_space,range),block_ranges)
+
 #! TODO: Does not work if there are empty blocks due to PRange checks when multiplying. 
 #! Maybe we should change to MatrixBlocks?  
 
@@ -110,7 +119,7 @@ y1 = PVector(0.0,A1.rows)
 x1 = PVector(1.0,A1.cols)
 mul!(y1,A1,x1)
 
-@test all(same_solution(y1,y1_blocks,X,dΩ) .< 1e-10)
+@test all(same_solution(y1,y1_blocks,X,block_trials,dΩ) .< 1e-5)
 
 ############################################################################################
 

From 2c6879d636df55a2bdeefdb3609f793cb89855f2 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Wed, 5 Jul 2023 19:21:52 +1000
Subject: [PATCH 13/56] Added temporary fix to ensure PRanges are same for all
 blocks

---
 src/MultiField.jl                  | 89 ++++++++++++++++++++++++++++++
 test/BlockMatrixAssemblersTests.jl | 10 +++-
 2 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/src/MultiField.jl b/src/MultiField.jl
index ea50785b..3043af5c 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -497,3 +497,92 @@ function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::MultiField.BlockSparseM
   cols = get_cols(a)
   map_parts(numeric_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
 end
+
+#! The following is horrible (see dicussion in PR) but necessary for the moment. We will be 
+#! bringing potentially too many ghosts from other procs. This will be dealt with in teh future, 
+#! but requires a little bit of refactoring of the assembly code. Postponed until GridapDistributed v0.3.
+
+function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:FullyAssembledRows}})
+  array = map(_fa_create_from_nz_temporary_fix,a.array)
+  return mortar(array)
+end
+
+function _fa_create_from_nz_temporary_fix(a::DistributedAllocationCOO{<:FullyAssembledRows})
+  parts     = get_part_ids(local_views(a))
+
+  rdofs = a.rows # dof ids of the test space
+  cdofs = a.cols # dof ids of the trial space
+  ngrdofs = length(rdofs)
+  ngcdofs = length(cdofs)
+  nordofs = map_parts(num_oids,rdofs.partition)
+  nocdofs = map_parts(num_oids,cdofs.partition)
+  first_grdof = map_parts(first_gdof_from_ids,rdofs.partition)
+  first_gcdof = map_parts(first_gdof_from_ids,cdofs.partition)
+  cneigs_snd  = cdofs.exchanger.parts_snd
+  cneigs_rcv  = cdofs.exchanger.parts_rcv
+
+  hcol_to_gid  = map_parts(part -> part.lid_to_gid[part.hid_to_lid], cdofs.partition)
+  hcol_to_part = map_parts(part -> part.lid_to_part[part.hid_to_lid], cdofs.partition)
+  
+  rows = PRange(
+    parts,
+    ngrdofs,
+    nordofs,
+    first_grdof)
+
+  cols = PRange(
+    parts,
+    ngcdofs,
+    nocdofs,
+    first_gcdof,
+    hcol_to_gid,
+    hcol_to_part,
+    cneigs_snd,
+    cneigs_rcv)
+
+  I,J,C = map_parts(a.allocs) do alloc
+    alloc.I, alloc.J, alloc.V
+  end
+  to_gids!(I,rdofs)
+  to_gids!(J,cdofs)
+  to_lids!(I,rows)
+  to_lids!(J,cols)
+
+  b = change_axes(a,(rows,cols))
+  
+  values    = map_parts(Algebra.create_from_nz,local_views(b))
+  exchanger = empty_exchanger(parts)
+  return PSparseMatrix(values,rows,cols,exchanger)
+end
+
+"""
+function Algebra.nz_allocation(a::ArrayBlock{<:DistributedCounterCOO})
+  array = map(Algebra.nz_allocation,a.array)
+  match_block_indexes!(array)
+  return ArrayBlock(array,a.touched)
+end
+
+function match_block_indexes!(allocators::Vector{<:DistributedAllocationCOO})
+  return allocators
+end
+
+function match_block_indexes!(allocators::Matrix{<:DistributedAllocationCOO})
+  s = size(allocators)
+
+  # Get an AbstractPData containing in each part the the matrix of local allocators
+  parts = get_part_ids(local_views(first(allocators)))
+  allocs = map_parts(parts) do p
+    idx = CartesianIndices(s)
+    allocs = map(idx) do I
+      get_part(local_views(a[I],rows[I[1]],cols[I[2]]),p)
+    end
+    return allocs
+  end
+
+  # Accumulate the index sets for each 
+  map_parts()
+  for block_row in 1:s[1]
+
+  end
+end
+"""
\ No newline at end of file
diff --git a/test/BlockMatrixAssemblersTests.jl b/test/BlockMatrixAssemblersTests.jl
index 3a48236f..d4bb7b7a 100644
--- a/test/BlockMatrixAssemblersTests.jl
+++ b/test/BlockMatrixAssemblersTests.jl
@@ -1,4 +1,4 @@
-using Test, LinearAlgebra, BlockArrays
+using Test, LinearAlgebra, BlockArrays, SparseArrays
 
 using Gridap
 using Gridap.FESpaces, Gridap.ReferenceFEs, Gridap.MultiField
@@ -18,7 +18,7 @@ V = FESpace(Ω, reffe)
 U = TrialFESpace(sol,V)
 
 dΩ = Measure(Ω, 4)
-biform((u1,u2,u3),(v1,v2,v3)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 - u2⋅v1 - v3⋅u3 + v3⋅u1 - v1⋅u3)*dΩ
+biform((u1,u2,u3),(v1,v2,v3)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 - u2⋅v1 - v3⋅u3)*dΩ # + v3⋅u1 - v1⋅u3)*dΩ
 liform((v1,v2,v3)) = ∫(v1 + v2 - v3)*dΩ
 
 ############################################################################################
@@ -125,3 +125,9 @@ mul!(y1,A1,x1)
 
 op = AffineFEOperator(biform,liform,X,Y)
 block_op = AffineFEOperator(biform,liform,Xb,Yb)
+
+
+A11 = A1_blocks.blocks[1,1]
+A12 = A1_blocks.blocks[1,2]
+A22 = A1_blocks.blocks[2,2]
+

From 9b1620cae019d2209653cd05c221af3100820ee8 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Sun, 13 Aug 2023 12:03:50 +1000
Subject: [PATCH 14/56] First round of fixes

---
 src/FESpaces.jl   | 22 ++++++++----
 src/MultiField.jl | 89 +++++++++++++++--------------------------------
 2 files changed, 45 insertions(+), 66 deletions(-)

diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index bf967531..bb740342 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -600,27 +600,37 @@ FESpaces.get_vector_builder(a::DistributedSparseMatrixAssembler) = a.vector_buil
 FESpaces.get_assembly_strategy(a::DistributedSparseMatrixAssembler) = a.strategy
 
 function FESpaces.symbolic_loop_matrix!(A,a::DistributedSparseMatrixAssembler,matdata)
-  map(symbolic_loop_matrix!,local_views(A,a.test_dofs_gids_prange,a.trial_dofs_gids_prange),a.assems,matdata)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map(symbolic_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata)
 end
 
 function FESpaces.numeric_loop_matrix!(A,a::DistributedSparseMatrixAssembler,matdata)
-  map(numeric_loop_matrix!,local_views(A,a.test_dofs_gids_prange,a.trial_dofs_gids_prange),a.assems,matdata)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map(numeric_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata)
 end
 
 function FESpaces.symbolic_loop_vector!(b,a::DistributedSparseMatrixAssembler,vecdata)
-  map(symbolic_loop_vector!,local_views(b,a.test_dofs_gids_prange),a.assems,vecdata)
+  rows = get_rows(a)
+  map(symbolic_loop_vector!,local_views(b,rows),local_views(a),vecdata)
 end
 
 function FESpaces.numeric_loop_vector!(b,a::DistributedSparseMatrixAssembler,vecdata)
-  map(numeric_loop_vector!,local_views(b,a.test_dofs_gids_prange),a.assems,vecdata)
+  rows = get_rows(a)
+  map(numeric_loop_vector!,local_views(b,rows),local_views(a),vecdata)
 end
 
 function FESpaces.symbolic_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data)
-  map(symbolic_loop_matrix_and_vector!,local_views(A,a.test_dofs_gids_prange,a.trial_dofs_gids_prange),local_views(b,a.test_dofs_gids_prange),a.assems,data)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
 end
 
 function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data)
-  map(numeric_loop_matrix_and_vector!,local_views(A,a.test_dofs_gids_prange,a.trial_dofs_gids_prange),local_views(b,a.test_dofs_gids_prange),a.assems,data)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map(numeric_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
 end
 
 # Parallel Assembly strategies
diff --git a/src/MultiField.jl b/src/MultiField.jl
index f8465f5a..66ac5229 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -395,6 +395,9 @@ end
 
 # BlockSparseMatrixAssemblers
 
+const DistributedBlockSparseMatrixAssembler{NB,NV,SB,P} = 
+  MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P,<:DistributedSparseMatrixAssembler}
+
 function FESpaces.SparseMatrixAssembler(
   local_mat_type,
   local_vec_type,
@@ -433,7 +436,7 @@ end
 function local_views(a::MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}) where {NB,NV,SB,P}
   assems = a.block_assemblers
   parts = get_part_ids(local_views(first(assems)))
-  map_parts(parts) do p
+  map(parts) do p
     idx = CartesianIndices(axes(assems))
     block_assems = map(idx) do I
       get_part(local_views(assems[I]),p)
@@ -444,7 +447,7 @@ end
 
 function local_views(a::MatrixBlock,rows,cols)
   parts = get_part_ids(local_views(first(a.array)))
-  map_parts(parts) do p
+  map(parts) do p
     idx = CartesianIndices(axes(a))
     array = map(idx) do I
       get_part(local_views(a[I],rows[I[1]],cols[I[2]]),p)
@@ -455,7 +458,7 @@ end
 
 function local_views(a::VectorBlock,rows)
   parts = get_part_ids(local_views(first(a.array)))
-  map_parts(parts) do p
+  map(parts) do p
     idx = CartesianIndices(axes(a))
     array = map(idx) do I
       get_part(local_views(a[I],rows[I]),p)
@@ -464,48 +467,46 @@ function local_views(a::VectorBlock,rows)
   end
 end
 
-function local_views(a::MultiField.ArrayBlockView,axes...)
+function local_views(a::ArrayBlockView,axes...)
   array = local_views(a.array,axes...)
-  map_parts(array) do array
+  map(array) do array
     MultiField.ArrayBlockView(array,a.block_map)
   end
 end
 
-
-#! The following functions could be avoided if we created an abstract superclass for
-#! DistributedSparseMatrixAssembler
-function FESpaces.symbolic_loop_matrix!(A,a::MultiField.BlockSparseMatrixAssembler,matdata::AbstractPData)
+#! The following could be avoided if DistributedBlockSparseMatrixAssembler <: DistributedSparseMatrixAssembler
+function FESpaces.symbolic_loop_matrix!(A,a::DistributedBlockSparseMatrixAssembler,matdata)
   rows = get_rows(a)
   cols = get_cols(a)
-  map_parts(symbolic_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata)
+  map(symbolic_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata)
 end
 
-function FESpaces.numeric_loop_matrix!(A,a::MultiField.BlockSparseMatrixAssembler,matdata::AbstractPData)
+function FESpaces.numeric_loop_matrix!(A,a::DistributedBlockSparseMatrixAssembler,matdata)
   rows = get_rows(a)
   cols = get_cols(a)
-  map_parts(numeric_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata)
+  map(numeric_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata)
 end
 
-function FESpaces.symbolic_loop_vector!(b,a::MultiField.BlockSparseMatrixAssembler,vecdata::AbstractPData)
+function FESpaces.symbolic_loop_vector!(b,a::DistributedBlockSparseMatrixAssembler,vecdata)
   rows = get_rows(a)
-  map_parts(symbolic_loop_vector!,local_views(b,rows),local_views(a),vecdata)
+  map(symbolic_loop_vector!,local_views(b,rows),local_views(a),vecdata)
 end
 
-function FESpaces.numeric_loop_vector!(b,a::MultiField.BlockSparseMatrixAssembler,vecdata::AbstractPData)
+function FESpaces.numeric_loop_vector!(b,a::DistributedBlockSparseMatrixAssembler,vecdata)
   rows = get_rows(a)
-  map_parts(numeric_loop_vector!,local_views(b,rows),local_views(a),vecdata)
+  map(numeric_loop_vector!,local_views(b,rows),local_views(a),vecdata)
 end
 
-function FESpaces.symbolic_loop_matrix_and_vector!(A,b,a::MultiField.BlockSparseMatrixAssembler,data::AbstractPData)
+function FESpaces.symbolic_loop_matrix_and_vector!(A,b,a::DistributedBlockSparseMatrixAssembler,data)
   rows = get_rows(a)
   cols = get_cols(a)
-  map_parts(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
+  map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
 end
 
-function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::MultiField.BlockSparseMatrixAssembler,data::AbstractPData)
+function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::DistributedBlockSparseMatrixAssembler,data)
   rows = get_rows(a)
   cols = get_cols(a)
-  map_parts(numeric_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
+  map(numeric_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
 end
 
 #! The following is horrible (see dicussion in PR) but necessary for the moment. We will be 
@@ -518,21 +519,21 @@ function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:Fully
 end
 
 function _fa_create_from_nz_temporary_fix(a::DistributedAllocationCOO{<:FullyAssembledRows})
-  parts     = get_part_ids(local_views(a))
+  parts = get_part_ids(local_views(a))
 
   rdofs = a.rows # dof ids of the test space
   cdofs = a.cols # dof ids of the trial space
   ngrdofs = length(rdofs)
   ngcdofs = length(cdofs)
-  nordofs = map_parts(num_oids,rdofs.partition)
-  nocdofs = map_parts(num_oids,cdofs.partition)
-  first_grdof = map_parts(first_gdof_from_ids,rdofs.partition)
-  first_gcdof = map_parts(first_gdof_from_ids,cdofs.partition)
+  nordofs = map(num_oids,rdofs.partition)
+  nocdofs = map(num_oids,cdofs.partition)
+  first_grdof = map(first_gdof_from_ids,rdofs.partition)
+  first_gcdof = map(first_gdof_from_ids,cdofs.partition)
   cneigs_snd  = cdofs.exchanger.parts_snd
   cneigs_rcv  = cdofs.exchanger.parts_rcv
 
-  hcol_to_gid  = map_parts(part -> part.lid_to_gid[part.hid_to_lid], cdofs.partition)
-  hcol_to_part = map_parts(part -> part.lid_to_part[part.hid_to_lid], cdofs.partition)
+  hcol_to_gid  = map(part -> part.lid_to_gid[part.hid_to_lid], cdofs.partition)
+  hcol_to_part = map(part -> part.lid_to_part[part.hid_to_lid], cdofs.partition)
   
   rows = PRange(
     parts,
@@ -560,39 +561,7 @@ function _fa_create_from_nz_temporary_fix(a::DistributedAllocationCOO{<:FullyAss
 
   b = change_axes(a,(rows,cols))
   
-  values    = map_parts(Algebra.create_from_nz,local_views(b))
+  values    = map(Algebra.create_from_nz,local_views(b))
   exchanger = empty_exchanger(parts)
   return PSparseMatrix(values,rows,cols,exchanger)
 end
-
-"""
-function Algebra.nz_allocation(a::ArrayBlock{<:DistributedCounterCOO})
-  array = map(Algebra.nz_allocation,a.array)
-  match_block_indexes!(array)
-  return ArrayBlock(array,a.touched)
-end
-
-function match_block_indexes!(allocators::Vector{<:DistributedAllocationCOO})
-  return allocators
-end
-
-function match_block_indexes!(allocators::Matrix{<:DistributedAllocationCOO})
-  s = size(allocators)
-
-  # Get an AbstractPData containing in each part the the matrix of local allocators
-  parts = get_part_ids(local_views(first(allocators)))
-  allocs = map_parts(parts) do p
-    idx = CartesianIndices(s)
-    allocs = map(idx) do I
-      get_part(local_views(a[I],rows[I[1]],cols[I[2]]),p)
-    end
-    return allocs
-  end
-
-  # Accumulate the index sets for each 
-  map_parts()
-  for block_row in 1:s[1]
-
-  end
-end
-"""
\ No newline at end of file

From 265a9d07fe79bc7076c29c4267f7245989139768 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Mon, 14 Aug 2023 10:07:27 +1000
Subject: [PATCH 15/56] More fixes

---
 src/FESpaces.jl                               |  2 +-
 src/MultiField.jl                             | 68 ++++++++++---------
 ...jl => BlockSparseMatrixAssemblersTests.jl} | 15 +++-
 3 files changed, 51 insertions(+), 34 deletions(-)
 rename test/{BlockMatrixAssemblersTests.jl => BlockSparseMatrixAssemblersTests.jl} (92%)

diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index bb740342..9f52a492 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -489,7 +489,7 @@ function _find_vector_type(spaces,gids)
   # TODO Now the user can select the local vector type but not the global one
   # new kw-arg global_vector_type ?
   # we use PVector for the moment
-  local_vector_type = get_vector_type(PartitonedArrays.getany(spaces))
+  local_vector_type = get_vector_type(PartitionedArrays.getany(spaces))
 
   if local_vector_type <: BlockVector
     T = eltype(local_vector_type)
diff --git a/src/MultiField.jl b/src/MultiField.jl
index 66ac5229..60bcc966 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -37,11 +37,7 @@ struct DistributedMultiFieldFESpace{MS,A,B,C,D} <: DistributedFESpace
   vector_type::Type{D}
   function DistributedMultiFieldFESpace(
     field_fe_space::AbstractVector{<:DistributedSingleFieldFESpace},
-<<<<<<< HEAD
-    part_fe_space::AbstractPData{<:MultiFieldFESpace{MS}},
-=======
-    part_fe_space::AbstractArray{<:MultiFieldFESpace},
->>>>>>> 38b02ee2811f5a28d66359dd085b826041f9ee07
+    part_fe_space::AbstractArray{<:MultiFieldFESpace{MS}},
     gids::PRange,
     vector_type::Type{D}) where {D,MS}
     A = typeof(field_fe_space)
@@ -425,52 +421,59 @@ end
 
 function FESpaces.SparseMatrixAssembler(
   trial::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle},
-  test::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle},
+  test ::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle},
   par_strategy=SubAssembledRows())
-  Tv = get_vector_type(get_part(local_views(first(trial))))
+  Tv = get_vector_type(PartitionedArrays.getany(local_views(first(trial))))
   T  = eltype(Tv)
   Tm = SparseMatrixCSC{T,Int}
   SparseMatrixAssembler(Tm,Tv,trial,test,par_strategy)
 end
 
-function local_views(a::MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}) where {NB,NV,SB,P}
-  assems = a.block_assemblers
-  parts = get_part_ids(local_views(first(assems)))
-  map(parts) do p
-    idx = CartesianIndices(axes(assems))
-    block_assems = map(idx) do I
-      get_part(local_views(assems[I]),p)
+# Array of PArrays -> PArray of Arrays 
+function to_parray_of_arrays(a::AbstractArray{<:MPIArray})
+  indices = linear_indices(first(a))
+  map(indices) do i
+    map(a) do aj
+      PartitionedArrays.getany(aj)
     end
-    return MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}(block_assems)
   end
 end
 
-function local_views(a::MatrixBlock,rows,cols)
-  parts = get_part_ids(local_views(first(a.array)))
-  map(parts) do p
-    idx = CartesianIndices(axes(a))
-    array = map(idx) do I
-      get_part(local_views(a[I],rows[I[1]],cols[I[2]]),p)
+function to_parray_of_arrays(a::AbstractArray{<:DebugArray})
+  indices = linear_indices(first(a))
+  map(indices) do i
+    map(a) do aj
+      aj.items[i]
     end
-    ArrayBlock(array,a.touched)
   end
 end
 
+function local_views(a::MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}) where {NB,NV,SB,P}
+  assems = a.block_assemblers
+  array = to_parray_of_arrays(map(local_views,assems))
+  return map(MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P},array)
+end
+
+function local_views(a::MatrixBlock,rows,cols)
+  idx = CartesianIndices(axes(a))
+  array = map(idx) do I
+    local_views(a[I],rows[I[1]],cols[I[2]])
+  end
+  return map(b -> ArrayBlock(b,a.touched), to_parray_of_arrays(array))
+end
+
 function local_views(a::VectorBlock,rows)
-  parts = get_part_ids(local_views(first(a.array)))
-  map(parts) do p
-    idx = CartesianIndices(axes(a))
-    array = map(idx) do I
-      get_part(local_views(a[I],rows[I]),p)
-    end
-    ArrayBlock(array,a.touched)
+  idx = CartesianIndices(axes(a))
+  array = map(idx) do I
+    local_views(a[I],rows[I])
   end
+  return map(b -> ArrayBlock(b,a.touched), to_parray_of_arrays(array))
 end
 
 function local_views(a::ArrayBlockView,axes...)
   array = local_views(a.array,axes...)
   map(array) do array
-    MultiField.ArrayBlockView(array,a.block_map)
+    ArrayBlockView(array,a.block_map)
   end
 end
 
@@ -514,10 +517,12 @@ end
 #! but requires a little bit of refactoring of the assembly code. Postponed until GridapDistributed v0.3.
 
 function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:FullyAssembledRows}})
-  array = map(_fa_create_from_nz_temporary_fix,a.array)
+  #array = map(_fa_create_from_nz_temporary_fix,a.array)
+  array = map(Algebra.create_from_nz,a.array)
   return mortar(array)
 end
 
+"""
 function _fa_create_from_nz_temporary_fix(a::DistributedAllocationCOO{<:FullyAssembledRows})
   parts = get_part_ids(local_views(a))
 
@@ -565,3 +570,4 @@ function _fa_create_from_nz_temporary_fix(a::DistributedAllocationCOO{<:FullyAss
   exchanger = empty_exchanger(parts)
   return PSparseMatrix(values,rows,cols,exchanger)
 end
+"""
\ No newline at end of file
diff --git a/test/BlockMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
similarity index 92%
rename from test/BlockMatrixAssemblersTests.jl
rename to test/BlockSparseMatrixAssemblersTests.jl
index d4bb7b7a..228951d1 100644
--- a/test/BlockMatrixAssemblersTests.jl
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -1,3 +1,5 @@
+module BlockSparseMatrixAssemblersTests
+
 using Test, LinearAlgebra, BlockArrays, SparseArrays
 
 using Gridap
@@ -6,11 +8,14 @@ using Gridap.FESpaces, Gridap.ReferenceFEs, Gridap.MultiField
 using GridapDistributed
 using PartitionedArrays
 
-parts = get_part_ids(SequentialBackend(),(2,2))
+nparts = (2,2)
+parts = with_debug() do distribute
+  distribute(LinearIndices((prod(nparts),)))
+end
 
 sol(x) = sum(x)
 
-model = CartesianDiscreteModel(parts,(0.0,1.0,0.0,1.0),(12,12))
+model = CartesianDiscreteModel(parts,nparts,(0.0,1.0,0.0,1.0),(12,12))
 Ω = Triangulation(model)
 
 reffe = LagrangianRefFE(Float64,QUAD,1)
@@ -106,6 +111,11 @@ block_trials = map(range -> get_block_fespace(X.field_fe_space,range),block_rang
 
 assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
 
+local_views(assem_blocks)
+
+ab = assem_blocks.block_assemblers
+map(local_views,ab)
+
 A1_blocks = assemble_matrix(assem_blocks,bmatdata);
 b1_blocks = assemble_vector(assem_blocks,bvecdata);
 
@@ -131,3 +141,4 @@ A11 = A1_blocks.blocks[1,1]
 A12 = A1_blocks.blocks[1,2]
 A22 = A1_blocks.blocks[2,2]
 
+end
\ No newline at end of file

From 8f95c71f933ec73e526a44b5bda3104b3d18e8e2 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Mon, 14 Aug 2023 15:43:54 +1000
Subject: [PATCH 16/56] Started implementing PRange merging when assembling

---
 src/Algebra.jl                           | 208 +++++++++++++----------
 src/MultiField.jl                        |  98 +++++------
 test/BlockSparseMatrixAssemblersTests.jl |   5 -
 3 files changed, 157 insertions(+), 154 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index 5797bce2..2a0e8905 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -321,7 +321,7 @@ function Algebra.nz_allocation(a::DistributedCounterCOO)
   DistributedAllocationCOO(a.par_strategy,allocs,a.test_dofs_gids_prange,a.trial_dofs_gids_prange)
 end
 
-struct DistributedAllocationCOO{A,B,C,D} <:GridapType
+struct DistributedAllocationCOO{A,B,C,D} <: GridapType
   par_strategy::A
   allocs::B
   test_dofs_gids_prange::C
@@ -341,10 +341,10 @@ end
 
 function change_axes(a::DistributedAllocationCOO{A,B,<:PRange,<:PRange},
                      axes::Tuple{<:PRange,<:PRange}) where {A,B}
-  local_axes=map(partition(axes[1]),partition(axes[2])) do rows,cols
+  local_axes = map(partition(axes[1]),partition(axes[2])) do rows,cols
     (Base.OneTo(local_length(rows)), Base.OneTo(local_length(cols)))
   end
-  allocs=map(change_axes,a.allocs,local_axes)
+  allocs = map(change_axes,a.allocs,local_axes)
   DistributedAllocationCOO(a.par_strategy,allocs,axes[1],axes[2])
 end
 
@@ -358,21 +358,31 @@ function local_views(a::DistributedAllocationCOO,test_dofs_gids_prange,trial_dof
   a.allocs
 end
 
+function get_allocations(a::DistributedAllocationCOO)
+  I,J,V = map(local_views(a)) do alloc
+    alloc.I, alloc.J, alloc.V
+  end |> tuple_of_arrays
+  return I,J,V
+end
+
+get_test_gids(a::DistributedAllocationCOO) = a.test_dofs_gids_prange
+get_trial_gids(a::DistributedAllocationCOO) = a.trial_dofs_gids_prange
+
 function first_gdof_from_ids(ids)
-  lid_to_gid=local_to_global(ids) 
-  owner_to_lid=own_to_local(ids)
-  own_length(ids)>0 ? Int(lid_to_gid[first(owner_to_lid)]) : 1
+  lid_to_gid   = local_to_global(ids) 
+  owner_to_lid = own_to_local(ids)
+  return (own_length(ids) > 0) ? Int(lid_to_gid[first(owner_to_lid)]) : 1
 end
 
 function find_gid_and_owner(ighost_to_jghost,jindices)
-  jghost_to_local=ghost_to_local(jindices)
-  jlocal_to_global=local_to_global(jindices)
-  jlocal_to_owner=local_to_owner(jindices)
+  jghost_to_local  = ghost_to_local(jindices)
+  jlocal_to_global = local_to_global(jindices)
+  jlocal_to_owner  = local_to_owner(jindices)
   ighost_to_jlocal = view(jghost_to_local,ighost_to_jghost)
 
   ighost_to_global = jlocal_to_global[ighost_to_jlocal]
   ighost_to_owner = jlocal_to_owner[ighost_to_jlocal]
-  ighost_to_global, ighost_to_owner
+  return ighost_to_global, ighost_to_owner
 end
 
 function Algebra.create_from_nz(a::PSparseMatrix)
@@ -385,15 +395,17 @@ end
 function Algebra.create_from_nz(a::DistributedAllocationCOO{<:FullyAssembledRows})
   f(x) = nothing
   A, = _fa_create_from_nz_with_callback(f,a)
-  A
+  return A
 end
 
 # The given ids are assumed to be a sub-set of the lids
 function ghost_lids_touched(a::AbstractLocalIndices,gids::AbstractVector{<:Integer})
+  glo_to_loc = global_to_local(a)
+  loc_to_gho = local_to_ghost(a)
+  
+  # First pass: Allocate
   i = 0
   ghost_lids_touched = fill(false,ghost_length(a))
-  glo_to_loc=global_to_local(a)
-  loc_to_gho=local_to_ghost(a)
   for gid in gids
     lid = glo_to_loc[gid]
     ghost_lid = loc_to_gho[lid]
@@ -403,18 +415,21 @@ function ghost_lids_touched(a::AbstractLocalIndices,gids::AbstractVector{<:Integ
     end
   end
   gids_ghost_lid_to_ghost_lid = Vector{Int32}(undef,i)
-  i = 0
-  ghost_lids_touched .= false
+
+  # Second pass: fill 
+  i = 1
+  fill!(ghost_lids_touched,false)
   for gid in gids
     lid = glo_to_loc[gid]
     ghost_lid = loc_to_gho[lid]
     if ghost_lid > 0 && !ghost_lids_touched[ghost_lid]
       ghost_lids_touched[ghost_lid] = true
-      i += 1
       gids_ghost_lid_to_ghost_lid[i] = ghost_lid
+      i += 1
     end
   end
-  gids_ghost_lid_to_ghost_lid
+
+  return gids_ghost_lid_to_ghost_lid
 end
 
 # Find the neighbours of partition1 trying 
@@ -422,74 +437,59 @@ end
 function _find_neighbours!(partition1, partition2)
   partition2_snd, partition2_rcv = assembly_neighbors(partition2)
   partition2_graph = ExchangeGraph(partition2_snd, partition2_rcv)
-  assembly_neighbors(partition1; neighbors=partition2_graph)
+  return assembly_neighbors(partition1; neighbors=partition2_graph)
 end 
 
 function _fa_create_from_nz_with_callback(callback,a)
 
   # Recover some data
-  I,J,V = map(a.allocs) do alloc
-    alloc.I, alloc.J, alloc.V
-  end |> tuple_of_arrays
-  test_dofs_gids_prange = a.test_dofs_gids_prange
-  trial_dofs_gids_prange = a.trial_dofs_gids_prange
-  test_dofs_gids_partition = partition(test_dofs_gids_prange)
-  trial_dofs_gids_partition = partition(trial_dofs_gids_prange)
-  ngcdofs = length(trial_dofs_gids_prange)
-  nocdofs = map(own_length,trial_dofs_gids_partition)
-
-  rows = _setup_prange_rows_without_ghosts(test_dofs_gids_prange)
+  I,J,V = get_allocations(a)
+  test_dofs_gids_prange  = get_test_gids(a)
+  trial_dofs_gids_prange = get_trial_gids(a)
 
+  rows = _setup_prange(test_dofs_gids_prange,I;ghost=false,ax=:rows)
   b = callback(rows)
 
   # convert I and J to global dof ids
-  map(to_global!,I,test_dofs_gids_partition)
-  map(to_global!,J,trial_dofs_gids_partition)
+  to_global_indices!(I,test_dofs_gids_prange)
+  to_global_indices!(J,trial_dofs_gids_prange)
 
   # Create the range for cols
-  cols = _setup_prange(trial_dofs_gids_prange,J)
-
+  cols = _setup_prange(trial_dofs_gids_prange,J;ax=:cols)
 
   # Convert again I,J to local numeration
-  map(to_local!,I,partition(rows))
-  map(to_local!,J,partition(cols))
+  to_local_indices!(I,rows)
+  to_local_indices!(J,cols)
 
   # Adjust local matrix size to linear system's index sets
-  asys=change_axes(a,(rows,cols))
+  asys = change_axes(a,(rows,cols))
 
   # Compress local portions
-  values = map(create_from_nz,asys.allocs)
+  values = map(create_from_nz,local_views(asys))
 
   # Finally build the matrix
   A = PSparseMatrix(values,partition(rows),partition(cols))
-
-  A, b
+  return A, b
 end
 
 function Algebra.create_from_nz(a::DistributedAllocationCOO{<:SubAssembledRows})
   f(x) = nothing
   A, = _sa_create_from_nz_with_callback(f,f,a)
-  A
+  return A
 end
 
 function _sa_create_from_nz_with_callback(callback,async_callback,a)
   # Recover some data
-  I,J,V = map(a.allocs) do alloc
-    alloc.I, alloc.J, alloc.V
-  end |> tuple_of_arrays
-  test_dofs_gids_prange = a.test_dofs_gids_prange
-  trial_dofs_gids_prange = a.trial_dofs_gids_prange
-  test_dofs_gids_partition = partition(test_dofs_gids_prange)
-  trial_dofs_gids_partition = partition(trial_dofs_gids_prange)
-  ngrdofs = length(test_dofs_gids_prange)
-  ngcdofs = length(test_dofs_gids_prange)
+  I,J,V = get_allocations(a)
+  test_dofs_gids_prange = get_test_gids(a)
+  trial_dofs_gids_prange = get_trial_gids(a)
 
   # convert I and J to global dof ids
-  map(to_global!,I,test_dofs_gids_partition)
-  map(to_global!,J,trial_dofs_gids_partition)
+  to_global_indices!(I,test_dofs_gids_prange)
+  to_global_indices!(J,trial_dofs_gids_prange)
 
   # Create the Prange for the rows
-  rows = _setup_prange(test_dofs_gids_prange,I)
+  rows = _setup_prange(test_dofs_gids_prange,I;ax=:rows)
   
   # Move values to the owner part
   # since we have integrated only over owned cells
@@ -504,20 +504,20 @@ function _sa_create_from_nz_with_callback(callback,async_callback,a)
   wait(t)
 
   # Create the Prange for the cols
-  cols = _setup_prange(trial_dofs_gids_prange,J)
+  cols = _setup_prange(trial_dofs_gids_prange,J;ax=:cols)
 
   # Overlap rhs communications with CSC compression
   t2 = async_callback(b)
 
   # Convert again I,J to local numeration
-  map(to_local!,I,partition(rows))
-  map(to_local!,J,partition(cols))
+  to_local_indices!(I,rows)
+  to_local_indices!(J,cols)
 
   # Adjust local matrix size to linear system's index sets
-  asys=change_axes(a,(rows,cols))
+  asys = change_axes(a,(rows,cols))
 
   # Compress the local matrices
-  values = map(create_from_nz,asys.allocs)
+  values = map(create_from_nz,local_views(asys))
 
   # Wait the transfer to finish
   if t2 !== nothing
@@ -526,8 +526,7 @@ function _sa_create_from_nz_with_callback(callback,async_callback,a)
 
   # Finally build the matrix
   A = PSparseMatrix(values,partition(rows),partition(cols))
-
-  A, b
+  return A, b
 end
 
 struct PVectorBuilder{T,B}
@@ -588,50 +587,71 @@ function local_views(a::PVectorAllocationTrackOnlyValues,rows)
   a.values
 end
 
+# to_global! & to_local! analogs, for dispatching
+
+function to_local_indices!(I,ids::PRange)
+  map(to_local!,I,partition(ids))
+end
+
+function to_local_indices!(I,ids::AbstractVector{<:PRange})
+  map(to_local_indices!,I,ids)
+end
+
+function to_global_indices!(I,ids::PRange)
+  map(to_global!,I,partition(ids))
+end
+
+function to_global_indices!(I,ids::AbstractVector{<:PRange})
+  map(to_global_indices!,I,ids)
+end
+
+# dofs_gids_prange can be either test_dofs_gids_prange or trial_dofs_gids_prange
+# In the former case, gids is a vector of global test dof identifiers, while in the 
+# latter, a vector of global trial dof identifiers
+function _setup_prange(dofs_gids_prange,gids;ghost=true,kwargs...)
+  if ghost
+    _setup_prange_with_ghosts(dofs_gids_prange,gids)
+  else
+    _setup_prange_without_ghosts(dofs_gids_prange)
+  end
+end
+
 # Create PRange for the rows of the linear system
 # without local ghost dofs as per required in the 
 # FullyAssembledRows() parallel assembly strategy 
-function _setup_prange_rows_without_ghosts(test_dofs_gids_prange)
-  ngdofs = length(test_dofs_gids_prange)
-  test_dofs_gids_partition = partition(test_dofs_gids_prange)
-  nodofs = map(own_length,test_dofs_gids_partition)
-  rindices=map(test_dofs_gids_partition) do dofs_indices 
+function _setup_prange_without_ghosts(dofs_gids_prange)
+  ngdofs = length(dofs_gids_prange)
+  indices = map(partition(dofs_gids_prange)) do dofs_indices 
     owner = part_id(dofs_indices)
-    own_indices=OwnIndices(ngdofs,owner,own_to_global(dofs_indices))
-    ghost_indices=GhostIndices(ngdofs,Int64[],Int32[])
+    own_indices = OwnIndices(ngdofs,owner,own_to_global(dofs_indices))
+    ghost_indices = GhostIndices(ngdofs,Int64[],Int32[])
     OwnAndGhostIndices(own_indices,ghost_indices)
   end
-  PRange(rindices)
+  return PRange(indices)
 end
 
-# dofs_gids_prange can be either test_dofs_gids_prange or trial_dofs_gids_prange
-# In the former case, gids is a vector of global test dof identifiers, while in the 
-# latter, a vector of global trial dof identifiers
-function _setup_prange(dofs_gids_prange,gids)
+function _setup_prange_with_ghosts(dofs_gids_prange,gids)
   ngdofs = length(dofs_gids_prange)
   dofs_gids_partition = partition(dofs_gids_prange)
-  gids_ghost_lids_to_dofs_ghost_lids = map(ghost_lids_touched,dofs_gids_partition,gids)
-  _setup_prange_impl_(ngdofs,gids_ghost_lids_to_dofs_ghost_lids,dofs_gids_partition)
-end
 
-function _setup_prange_impl_(ngdofs,gids_ghost_lids_to_dofs_ghost_lids,dofs_gids_partition)
+  gids_ghost_lids_to_dofs_ghost_lids = map(ghost_lids_touched,dofs_gids_partition,gids)
   gids_ghost_to_global, gids_ghost_to_owner = map(
     find_gid_and_owner,gids_ghost_lids_to_dofs_ghost_lids,dofs_gids_partition) |> tuple_of_arrays
 
-  indices=map(dofs_gids_partition, 
-               gids_ghost_to_global, 
-               gids_ghost_to_owner) do dofs_indices, ghost_to_global, ghost_to_owner 
+  indices = map(dofs_gids_partition, 
+                gids_ghost_to_global, 
+                gids_ghost_to_owner) do dofs_indices, ghost_to_global, ghost_to_owner 
      owner = part_id(dofs_indices)
-     own_indices=OwnIndices(ngdofs,owner,own_to_global(dofs_indices))
-     ghost_indices=GhostIndices(ngdofs,ghost_to_global,ghost_to_owner)
+     own_indices   = OwnIndices(ngdofs,owner,own_to_global(dofs_indices))
+     ghost_indices = GhostIndices(ngdofs,ghost_to_global,ghost_to_owner)
      OwnAndGhostIndices(own_indices,ghost_indices)
   end
   _find_neighbours!(indices, dofs_gids_partition)
-  PRange(indices)
+  return PRange(indices)
 end 
 
 function Algebra.create_from_nz(a::PVectorAllocationTrackOnlyValues{<:FullyAssembledRows})
-  rows = _setup_prange_rows_without_ghosts(a.test_dofs_gids_prange)
+  rows = _setup_prange_without_ghosts(a.test_dofs_gids_prange)
   _rhs_callback(a,rows)
 end
 
@@ -764,8 +784,8 @@ function Arrays.nz_allocation(a::DistributedCounterCOO{<:SubAssembledRows},
   A = nz_allocation(a)
   dofs = b.test_dofs_gids_prange
   values = map(nz_allocation,b.counters)
-  B=PVectorAllocationTrackOnlyValues(b.par_strategy,values,dofs)
-  A,B
+  B = PVectorAllocationTrackOnlyValues(b.par_strategy,values,dofs)
+  return A,B
 end
 
 function Arrays.nz_allocation(a::PVectorCounter{<:SubAssembledRows})
@@ -777,7 +797,7 @@ function Arrays.nz_allocation(a::PVectorCounter{<:SubAssembledRows})
   allocations=map(values,touched) do values,touched
     ArrayAllocationTrackTouchedAndValues(touched,values)
   end
-  PVectorAllocationTrackTouchedAndValues(allocations,values,dofs)
+  return PVectorAllocationTrackTouchedAndValues(allocations,values,dofs)
 end
 
 function local_views(a::PVectorAllocationTrackTouchedAndValues)
@@ -795,12 +815,12 @@ function Algebra.create_from_nz(a::PVectorAllocationTrackTouchedAndValues)
     loc_to_gho = local_to_ghost(indices)
     n_I_ghost_lids = count((x)->loc_to_gho[x]!=0,dofs_lids_touched)
     I_ghost_lids = Vector{Int32}(undef,n_I_ghost_lids)
-    cur=1
+    cur = 1
     for lid in dofs_lids_touched
       dof_lid=loc_to_gho[lid]
-      if dof_lid!=0
-        I_ghost_lids[cur]=dof_lid
-        cur=cur+1
+      if dof_lid != 0
+        I_ghost_lids[cur] = dof_lid
+        cur = cur+1
       end
     end
     I_ghost_lids
@@ -813,9 +833,9 @@ function Algebra.create_from_nz(a::PVectorAllocationTrackTouchedAndValues)
   b = _rhs_callback(a,rows)
   t2 = assemble!(b)
 
-   # Wait the transfer to finish
-   if t2 !== nothing
-     wait(t2)
-   end
-   b
+  # Wait the transfer to finish
+  if t2 !== nothing
+    wait(t2)
+  end
+  b
 end
diff --git a/src/MultiField.jl b/src/MultiField.jl
index 60bcc966..8fd32851 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -401,11 +401,10 @@ function FESpaces.SparseMatrixAssembler(
   test::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}},
   par_strategy=SubAssembledRows()) where {NB,SB,P}
 
-  # Build block spaces #! TODO: Eliminate this, build the PRANGES directly
+  # Build block spaces
   function get_block_fespace(spaces,range)
     (length(range) == 1) ? spaces[range[1]] : MultiFieldFESpace(spaces[range])
   end
-  NV = length(trial.field_fe_space)
   block_ranges = MultiField.get_block_ranges(NB,SB,P)
   block_tests  = map(range -> get_block_fespace(test.field_fe_space,range),block_ranges)
   block_trials = map(range -> get_block_fespace(trial.field_fe_space,range),block_ranges)
@@ -416,6 +415,7 @@ function FESpaces.SparseMatrixAssembler(
     return SparseMatrixAssembler(local_mat_type,local_vec_type,Xj,Yi,par_strategy)
   end
 
+  NV = length(trial.field_fe_space)
   return MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}(block_assemblers)
 end
 
@@ -477,6 +477,8 @@ function local_views(a::ArrayBlockView,axes...)
   end
 end
 
+# SparseMatrixAssembler API
+
 #! The following could be avoided if DistributedBlockSparseMatrixAssembler <: DistributedSparseMatrixAssembler
 function FESpaces.symbolic_loop_matrix!(A,a::DistributedBlockSparseMatrixAssembler,matdata)
   rows = get_rows(a)
@@ -512,62 +514,48 @@ function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::DistributedBlockSparseM
   map(numeric_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
 end
 
-#! The following is horrible (see dicussion in PR) but necessary for the moment. We will be 
-#! bringing potentially too many ghosts from other procs. This will be dealt with in teh future, 
-#! but requires a little bit of refactoring of the assembly code. Postponed until GridapDistributed v0.3.
+# Assembly 
 
-function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:FullyAssembledRows}})
-  #array = map(_fa_create_from_nz_temporary_fix,a.array)
-  array = map(Algebra.create_from_nz,a.array)
-  return mortar(array)
+function get_allocations(a::ArrayBlock{<:DistributedAllocationCOO})
+  tuple_of_array_of_parrays = map(get_allocations,a.array) |> tuple_of_arrays
+  #tuple_of_parray_of_arrays = map(to_parray_of_arrays,tuple_of_array_of_parrays)
+  return tuple_of_array_of_parrays
 end
 
-"""
-function _fa_create_from_nz_temporary_fix(a::DistributedAllocationCOO{<:FullyAssembledRows})
-  parts = get_part_ids(local_views(a))
-
-  rdofs = a.rows # dof ids of the test space
-  cdofs = a.cols # dof ids of the trial space
-  ngrdofs = length(rdofs)
-  ngcdofs = length(cdofs)
-  nordofs = map(num_oids,rdofs.partition)
-  nocdofs = map(num_oids,cdofs.partition)
-  first_grdof = map(first_gdof_from_ids,rdofs.partition)
-  first_gcdof = map(first_gdof_from_ids,cdofs.partition)
-  cneigs_snd  = cdofs.exchanger.parts_snd
-  cneigs_rcv  = cdofs.exchanger.parts_rcv
-
-  hcol_to_gid  = map(part -> part.lid_to_gid[part.hid_to_lid], cdofs.partition)
-  hcol_to_part = map(part -> part.lid_to_part[part.hid_to_lid], cdofs.partition)
-  
-  rows = PRange(
-    parts,
-    ngrdofs,
-    nordofs,
-    first_grdof)
-
-  cols = PRange(
-    parts,
-    ngcdofs,
-    nocdofs,
-    first_gcdof,
-    hcol_to_gid,
-    hcol_to_part,
-    cneigs_snd,
-    cneigs_rcv)
-
-  I,J,C = map_parts(a.allocs) do alloc
-    alloc.I, alloc.J, alloc.V
+function get_test_gids(a::ArrayBlock{<:DistributedAllocationCOO})
+  return map(get_test_gids,a.array[:,1])
+end
+
+function get_trial_gids(a::ArrayBlock{<:DistributedAllocationCOO})
+  return map(get_trial_gids,a.array[1,:])
+end
+
+function change_axes(a::ArrayBlock{<:DistributedAllocationCOO},axes)
+  array = map(ai -> change_axes(ai,axes),a.array)
+  return ArrayBlock(array,a.touched)
+end
+
+function _setup_prange(dofs_gids_prange::AbstractVector{<:PRange},gids::AbstractMatrix;ghost=true,ax=:rows)
+  @check ax ∈ (:rows,:cols)
+  block_ids = LinearIndices(dofs_gids_prange)
+  gids_ax_slice = map(block_ids) do id
+    gids_ax_slice = (ax == :rows) ? gids[id,:] : gids[:,id]
+    if ghost
+      gids_ax_slice = map(x -> union(x...), to_parray_of_arrays(gids_ax_slice))
+    end
+    return gids_ax_slice
   end
-  to_gids!(I,rdofs)
-  to_gids!(J,cdofs)
-  to_lids!(I,rows)
-  to_lids!(J,cols)
+  return map((p,g) -> _setup_prange(p,g;ghost=ghost), dofs_gids_prange, gids_ax_slice)
+end
 
-  b = change_axes(a,(rows,cols))
-  
-  values    = map(Algebra.create_from_nz,local_views(b))
-  exchanger = empty_exchanger(parts)
-  return PSparseMatrix(values,rows,cols,exchanger)
+function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:FullyAssembledRows}})
+  f(x) = nothing
+  A, = _fa_create_from_nz_with_callback(f,a)
+  return A
+end
+
+function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:SubAssembledRows}})
+  f(x) = nothing
+  A, = _sa_create_from_nz_with_callback(f,f,a)
+  return A
 end
-"""
\ No newline at end of file
diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
index 228951d1..9cd8afed 100644
--- a/test/BlockSparseMatrixAssemblersTests.jl
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -111,11 +111,6 @@ block_trials = map(range -> get_block_fespace(X.field_fe_space,range),block_rang
 
 assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
 
-local_views(assem_blocks)
-
-ab = assem_blocks.block_assemblers
-map(local_views,ab)
-
 A1_blocks = assemble_matrix(assem_blocks,bmatdata);
 b1_blocks = assemble_vector(assem_blocks,bvecdata);
 

From f756f41264912caac416bd084ba292036077a777 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 15 Aug 2023 10:36:34 +1000
Subject: [PATCH 17/56] Finished optimizing assembly

---
 src/Algebra.jl                           | 62 +++++++++++++++++-------
 src/GridapDistributed.jl                 |  2 +-
 src/MultiField.jl                        | 19 ++++++--
 test/BlockSparseMatrixAssemblersTests.jl | 38 +++++++++------
 4 files changed, 82 insertions(+), 39 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index 2a0e8905..304300c2 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -451,15 +451,15 @@ function _fa_create_from_nz_with_callback(callback,a)
   b = callback(rows)
 
   # convert I and J to global dof ids
-  to_global_indices!(I,test_dofs_gids_prange)
-  to_global_indices!(J,trial_dofs_gids_prange)
+  to_global_indices!(I,test_dofs_gids_prange;ax=:rows)
+  to_global_indices!(J,trial_dofs_gids_prange;ax=:cols)
 
   # Create the range for cols
   cols = _setup_prange(trial_dofs_gids_prange,J;ax=:cols)
 
   # Convert again I,J to local numeration
-  to_local_indices!(I,rows)
-  to_local_indices!(J,cols)
+  to_local_indices!(I,rows;ax=:rows)
+  to_local_indices!(J,cols;ax=:cols)
 
   # Adjust local matrix size to linear system's index sets
   asys = change_axes(a,(rows,cols))
@@ -468,7 +468,7 @@ function _fa_create_from_nz_with_callback(callback,a)
   values = map(create_from_nz,local_views(asys))
 
   # Finally build the matrix
-  A = PSparseMatrix(values,partition(rows),partition(cols))
+  A = _setup_matrix(values,rows,cols)
   return A, b
 end
 
@@ -485,8 +485,8 @@ function _sa_create_from_nz_with_callback(callback,async_callback,a)
   trial_dofs_gids_prange = get_trial_gids(a)
 
   # convert I and J to global dof ids
-  to_global_indices!(I,test_dofs_gids_prange)
-  to_global_indices!(J,trial_dofs_gids_prange)
+  to_global_indices!(I,test_dofs_gids_prange;ax=:rows)
+  to_global_indices!(J,trial_dofs_gids_prange;ax=:cols)
 
   # Create the Prange for the rows
   rows = _setup_prange(test_dofs_gids_prange,I;ax=:rows)
@@ -510,8 +510,8 @@ function _sa_create_from_nz_with_callback(callback,async_callback,a)
   t2 = async_callback(b)
 
   # Convert again I,J to local numeration
-  to_local_indices!(I,rows)
-  to_local_indices!(J,cols)
+  to_local_indices!(I,rows;ax=:rows)
+  to_local_indices!(J,cols;ax=:cols)
 
   # Adjust local matrix size to linear system's index sets
   asys = change_axes(a,(rows,cols))
@@ -525,7 +525,7 @@ function _sa_create_from_nz_with_callback(callback,async_callback,a)
   end
 
   # Finally build the matrix
-  A = PSparseMatrix(values,partition(rows),partition(cols))
+  A = _setup_matrix(values,rows,cols)
   return A, b
 end
 
@@ -587,22 +587,48 @@ function local_views(a::PVectorAllocationTrackOnlyValues,rows)
   a.values
 end
 
-# to_global! & to_local! analogs, for dispatching
+# to_global! & to_local! analogs, for dispatching in block assembly
 
-function to_local_indices!(I,ids::PRange)
+function to_local_indices!(I,ids::PRange;kwargs...)
   map(to_local!,I,partition(ids))
 end
 
-function to_local_indices!(I,ids::AbstractVector{<:PRange})
-  map(to_local_indices!,I,ids)
+function to_global_indices!(I,ids::PRange;kwargs...)
+  map(to_global!,I,partition(ids))
 end
 
-function to_global_indices!(I,ids::PRange)
-  map(to_global!,I,partition(ids))
+for f in [:to_local_indices!, :to_global_indices!]
+  @eval begin
+    function $f(I::Vector,ids::AbstractVector{<:PRange};kwargs...)
+      map($f,I,ids)
+    end
+
+    function $f(I::Matrix,ids::AbstractVector{<:PRange};ax=:rows)
+      @check ax ∈ [:rows,:cols]
+      block_ids = CartesianIndices(I)
+      map(block_ids) do id
+        i = id[1]; j = id[2];
+        if ax == :rows
+          $f(I[i,j],ids[i])
+        else
+          $f(I[i,j],ids[j])
+        end
+      end
+    end
+  end
 end
 
-function to_global_indices!(I,ids::AbstractVector{<:PRange})
-  map(to_global_indices!,I,ids)
+function _setup_matrix(values,rows::PRange,cols::PRange)
+  return PSparseMatrix(values,partition(rows),partition(cols))
+end
+
+function _setup_matrix(values,rows::Vector{<:PRange},cols::Vector{<:PRange})
+  block_ids  = CartesianIndices((length(rows),length(cols)))
+  block_mats = map(block_ids) do I
+    block_values = map(v -> blocks(v)[I],values)
+    return _setup_matrix(block_values,rows[I[1]],cols[I[2]])
+  end
+  return mortar(block_mats)
 end
 
 # dofs_gids_prange can be either test_dofs_gids_prange or trial_dofs_gids_prange
diff --git a/src/GridapDistributed.jl b/src/GridapDistributed.jl
index aab265b6..6df3dd25 100644
--- a/src/GridapDistributed.jl
+++ b/src/GridapDistributed.jl
@@ -25,7 +25,7 @@ using FillArrays
 using BlockArrays
 
 import Gridap.TensorValues: inner, outer, double_contraction, symmetric_part
-import LinearAlgebra: det, tr, cross, dot, ⋅
+import LinearAlgebra: det, tr, cross, dot, ⋅, diag
 import Base: inv, abs, abs2, *, +, -, /, adjoint, transpose, real, imag, conj, getproperty, propertynames
 import Gridap.Fields: grad2curl
 import Gridap.ODEs.ODETools: ∂t, ∂tt
diff --git a/src/MultiField.jl b/src/MultiField.jl
index 8fd32851..f8383242 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -523,21 +523,32 @@ function get_allocations(a::ArrayBlock{<:DistributedAllocationCOO})
 end
 
 function get_test_gids(a::ArrayBlock{<:DistributedAllocationCOO})
-  return map(get_test_gids,a.array[:,1])
+  return map(get_test_gids,diag(a.array))
 end
 
 function get_trial_gids(a::ArrayBlock{<:DistributedAllocationCOO})
-  return map(get_trial_gids,a.array[1,:])
+  return map(get_trial_gids,diag(a.array))
 end
 
-function change_axes(a::ArrayBlock{<:DistributedAllocationCOO},axes)
-  array = map(ai -> change_axes(ai,axes),a.array)
+function change_axes(a::MatrixBlock{<:DistributedAllocationCOO},axes::Tuple{<:Vector,<:Vector})
+  block_ids  = CartesianIndices(a.array)
+  rows, cols = axes
+
+  array = map(block_ids) do I
+    change_axes(a[I],(rows[I[1]],cols[I[2]]))
+  end
   return ArrayBlock(array,a.touched)
 end
 
+function local_views(a::MatrixBlock{<:DistributedAllocationCOO})
+  array = map(local_views,a.array) |> to_parray_of_arrays
+  return map(ai -> ArrayBlock(ai,a.touched),array)
+end
+
 function _setup_prange(dofs_gids_prange::AbstractVector{<:PRange},gids::AbstractMatrix;ghost=true,ax=:rows)
   @check ax ∈ (:rows,:cols)
   block_ids = LinearIndices(dofs_gids_prange)
+
   gids_ax_slice = map(block_ids) do id
     gids_ax_slice = (ax == :rows) ? gids[id,:] : gids[:,id]
     if ghost
diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
index 9cd8afed..71e61ed9 100644
--- a/test/BlockSparseMatrixAssemblersTests.jl
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -8,14 +8,14 @@ using Gridap.FESpaces, Gridap.ReferenceFEs, Gridap.MultiField
 using GridapDistributed
 using PartitionedArrays
 
-nparts = (2,2)
+nparts = (2,1)
 parts = with_debug() do distribute
   distribute(LinearIndices((prod(nparts),)))
 end
 
 sol(x) = sum(x)
 
-model = CartesianDiscreteModel(parts,nparts,(0.0,1.0,0.0,1.0),(12,12))
+model = CartesianDiscreteModel(parts,nparts,(0.0,1.0,0.0,1.0),(4,1))
 Ω = Triangulation(model)
 
 reffe = LagrangianRefFE(Float64,QUAD,1)
@@ -23,14 +23,16 @@ V = FESpace(Ω, reffe)
 U = TrialFESpace(sol,V)
 
 dΩ = Measure(Ω, 4)
-biform((u1,u2,u3),(v1,v2,v3)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 - u2⋅v1 - v3⋅u3)*dΩ # + v3⋅u1 - v1⋅u3)*dΩ
-liform((v1,v2,v3)) = ∫(v1 + v2 - v3)*dΩ
+#biform((u1,u2,u3),(v1,v2,v3)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 - u2⋅v1 - v3⋅u3)*dΩ # + v3⋅u1 - v1⋅u3)*dΩ
+#liform((v1,v2,v3)) = ∫(v1 + v2 - v3)*dΩ
+biform((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 - u2⋅v1)*dΩ
+liform((v1,v2)) = ∫(v1 + v2)*dΩ
 
 ############################################################################################
 # Normal assembly 
 
-Y = MultiFieldFESpace([V,V,V])
-X = MultiFieldFESpace([U,U,U])
+Y = MultiFieldFESpace([V,V])
+X = MultiFieldFESpace([U,U])
 
 u = get_trial_fe_basis(X)
 v = get_fe_basis(Y)
@@ -51,10 +53,10 @@ A11 = assemble_matrix((u1,v1)->∫(∇(u1)⋅∇(v1))*dΩ,assem11,U,V)
 # Block MultiFieldStyle
 
 #mfs = BlockMultiFieldStyle()
-mfs = BlockMultiFieldStyle(2,(1,2))
+mfs = BlockMultiFieldStyle()#2,(1,2))
 
-Yb  = MultiFieldFESpace([V,V,V];style=mfs)
-Xb  = MultiFieldFESpace([U,U,U];style=mfs)
+Yb  = MultiFieldFESpace([V,V];style=mfs)
+Xb  = MultiFieldFESpace([U,U];style=mfs)
 
 ub = get_trial_fe_basis(Xb)
 vb = get_fe_basis(Yb)
@@ -99,18 +101,22 @@ function test_axes(c::BlockVector,a::BlockMatrix,b::BlockVector)
   return res
 end
 
-function get_block_fespace(spaces,range)
-  (length(range) == 1) ? spaces[range[1]] : MultiFieldFESpace(spaces[range])
-end
-
-block_ranges = Gridap.MultiField.get_block_ranges(2,(1,2),(1,2,3))
-block_trials = map(range -> get_block_fespace(X.field_fe_space,range),block_ranges)
-
 #! TODO: Does not work if there are empty blocks due to PRange checks when multiplying. 
 #! Maybe we should change to MatrixBlocks?  
 
 assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
 
+rows = get_rows(assem_blocks)
+cols = get_cols(assem_blocks)
+mat_builder = get_matrix_builder(assem_blocks)
+
+m1 = Gridap.FESpaces.nz_counter(mat_builder,(rows,cols))
+Gridap.FESpaces.symbolic_loop_matrix!(m1,assem_blocks,bmatdata)
+m2 = Gridap.FESpaces.nz_allocation(m1)
+Gridap.FESpaces.numeric_loop_matrix!(m2,assem_blocks,bmatdata)
+
+m3 = Gridap.FESpaces.create_from_nz(m2)
+
 A1_blocks = assemble_matrix(assem_blocks,bmatdata);
 b1_blocks = assemble_vector(assem_blocks,bvecdata);
 

From c5bd6805c0d282dd24648769ce1ce0e294301027 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 15 Aug 2023 12:39:45 +1000
Subject: [PATCH 18/56] Updated tests

---
 docs/Manifest.toml                       | 524 -----------------------
 src/MultiField.jl                        |  14 +
 test/BlockSparseMatrixAssemblersTests.jl |  59 +--
 3 files changed, 30 insertions(+), 567 deletions(-)
 delete mode 100644 docs/Manifest.toml

diff --git a/docs/Manifest.toml b/docs/Manifest.toml
deleted file mode 100644
index f25a7a58..00000000
--- a/docs/Manifest.toml
+++ /dev/null
@@ -1,524 +0,0 @@
-# This file is machine-generated - editing it directly is not advised
-
-[[AbstractTrees]]
-git-tree-sha1 = "03e0550477d86222521d254b741d470ba17ea0b5"
-uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
-version = "0.3.4"
-
-[[ArgTools]]
-uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
-
-[[ArrayInterface]]
-deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
-git-tree-sha1 = "745233d77146ad221629590b6d82fe7f1ddb478f"
-uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
-version = "4.0.3"
-
-[[ArrayLayouts]]
-deps = ["FillArrays", "LinearAlgebra", "SparseArrays"]
-git-tree-sha1 = "e1ba79094cae97b688fb42d31cbbfd63a69706e4"
-uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
-version = "0.7.8"
-
-[[Artifacts]]
-uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
-
-[[BSON]]
-git-tree-sha1 = "ebcd6e22d69f21249b7b8668351ebf42d6dc87a1"
-uuid = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
-version = "0.3.4"
-
-[[Base64]]
-uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
-
-[[BlockArrays]]
-deps = ["ArrayLayouts", "FillArrays", "LinearAlgebra"]
-git-tree-sha1 = "5524e27323cf4c4505699c3fb008c3f772269945"
-uuid = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
-version = "0.16.9"
-
-[[ChainRulesCore]]
-deps = ["Compat", "LinearAlgebra", "SparseArrays"]
-git-tree-sha1 = "f9982ef575e19b0e5c7a98c6e75ee496c0f73a93"
-uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-version = "1.12.0"
-
-[[ChangesOfVariables]]
-deps = ["ChainRulesCore", "LinearAlgebra", "Test"]
-git-tree-sha1 = "bf98fa45a0a4cee295de98d4c1462be26345b9a1"
-uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
-version = "0.1.2"
-
-[[CodecZlib]]
-deps = ["TranscodingStreams", "Zlib_jll"]
-git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da"
-uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
-version = "0.7.0"
-
-[[Combinatorics]]
-git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860"
-uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
-version = "1.0.2"
-
-[[CommonSubexpressions]]
-deps = ["MacroTools", "Test"]
-git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7"
-uuid = "bbf7d656-a473-5ed7-a52c-81e309532950"
-version = "0.3.0"
-
-[[Compat]]
-deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
-git-tree-sha1 = "44c37b4636bc54afac5c574d2d02b625349d6582"
-uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
-version = "3.41.0"
-
-[[CompilerSupportLibraries_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
-
-[[DataStructures]]
-deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
-git-tree-sha1 = "3daef5523dd2e769dad2365274f760ff5f282c7d"
-uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
-version = "0.18.11"
-
-[[Dates]]
-deps = ["Printf"]
-uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
-
-[[DelimitedFiles]]
-deps = ["Mmap"]
-uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
-
-[[DiffResults]]
-deps = ["StaticArrays"]
-git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805"
-uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
-version = "1.0.3"
-
-[[DiffRules]]
-deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"]
-git-tree-sha1 = "84083a5136b6abf426174a58325ffd159dd6d94f"
-uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
-version = "1.9.1"
-
-[[Distances]]
-deps = ["LinearAlgebra", "SparseArrays", "Statistics", "StatsAPI"]
-git-tree-sha1 = "3258d0659f812acde79e8a74b11f17ac06d0ca04"
-uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
-version = "0.10.7"
-
-[[Distributed]]
-deps = ["Random", "Serialization", "Sockets"]
-uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
-
-[[DocStringExtensions]]
-deps = ["LibGit2", "Markdown", "Pkg", "Test"]
-git-tree-sha1 = "88bb0edb352b16608036faadcc071adda068582a"
-uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-version = "0.8.1"
-
-[[Documenter]]
-deps = ["Base64", "Dates", "DocStringExtensions", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
-git-tree-sha1 = "bc99c157ff2957c058a1067061d16c2c83d1ec42"
-uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-version = "0.24.9"
-
-[[Downloads]]
-deps = ["ArgTools", "LibCURL", "NetworkOptions"]
-uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
-
-[[FastGaussQuadrature]]
-deps = ["LinearAlgebra", "SpecialFunctions", "StaticArrays"]
-git-tree-sha1 = "58d83dd5a78a36205bdfddb82b1bb67682e64487"
-uuid = "442a2c76-b920-505d-bb47-c5924d526838"
-version = "0.4.9"
-
-[[FileIO]]
-deps = ["Pkg", "Requires", "UUIDs"]
-git-tree-sha1 = "80ced645013a5dbdc52cf70329399c35ce007fae"
-uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
-version = "1.13.0"
-
-[[FillArrays]]
-deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"]
-git-tree-sha1 = "deed294cde3de20ae0b2e0355a6c4e1c6a5ceffc"
-uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
-version = "0.12.8"
-
-[[FiniteDiff]]
-deps = ["ArrayInterface", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays"]
-git-tree-sha1 = "6eae72e9943d8992d14359c32aed5f892bda1569"
-uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
-version = "2.10.0"
-
-[[ForwardDiff]]
-deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions", "StaticArrays"]
-git-tree-sha1 = "1bd6fc0c344fc0cbee1f42f8d2e7ec8253dda2d2"
-uuid = "f6369f11-7733-5829-9624-2563aa707210"
-version = "0.10.25"
-
-[[Gridap]]
-deps = ["AbstractTrees", "BSON", "BlockArrays", "Combinatorics", "DocStringExtensions", "FastGaussQuadrature", "FileIO", "FillArrays", "ForwardDiff", "JLD2", "JSON", "LineSearches", "LinearAlgebra", "NLsolve", "NearestNeighbors", "QuadGK", "Random", "SparseArrays", "SparseMatricesCSR", "StaticArrays", "Test", "WriteVTK"]
-git-tree-sha1 = "8170171ad94936897cd082c0cd6a39f4f5671c0d"
-uuid = "56d4f2e9-7ea1-5844-9cf6-b9c51ca7ce8e"
-version = "0.17.7"
-
-[[GridapDistributed]]
-deps = ["FillArrays", "Gridap", "LinearAlgebra", "MPI", "PartitionedArrays", "SparseArrays", "SparseMatricesCSR", "WriteVTK"]
-path = "/home/amartin/git-repos/GridapDistributed.jl"
-uuid = "f9701e48-63b3-45aa-9a63-9bc6c271f355"
-version = "0.2.4"
-
-[[IfElse]]
-git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1"
-uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
-version = "0.1.1"
-
-[[InteractiveUtils]]
-deps = ["Markdown"]
-uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
-
-[[InverseFunctions]]
-deps = ["Test"]
-git-tree-sha1 = "a7254c0acd8e62f1ac75ad24d5db43f5f19f3c65"
-uuid = "3587e190-3f89-42d0-90ee-14403ec27112"
-version = "0.1.2"
-
-[[IrrationalConstants]]
-git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151"
-uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
-version = "0.1.1"
-
-[[IterativeSolvers]]
-deps = ["LinearAlgebra", "Printf", "Random", "RecipesBase", "SparseArrays"]
-git-tree-sha1 = "1169632f425f79429f245113b775a0e3d121457c"
-uuid = "42fd0dbc-a981-5370-80f2-aaf504508153"
-version = "0.9.2"
-
-[[JLD2]]
-deps = ["DataStructures", "FileIO", "MacroTools", "Mmap", "Pkg", "Printf", "Reexport", "TranscodingStreams", "UUIDs"]
-git-tree-sha1 = "b528d68220e2aba1d2d0c0461b6f7eda8c5c1e33"
-uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
-version = "0.4.20"
-
-[[JLLWrappers]]
-deps = ["Preferences"]
-git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1"
-uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
-version = "1.4.1"
-
-[[JSON]]
-deps = ["Dates", "Mmap", "Parsers", "Unicode"]
-git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e"
-uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-version = "0.21.0"
-
-[[LazyArtifacts]]
-deps = ["Artifacts", "Pkg"]
-uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
-
-[[LibCURL]]
-deps = ["LibCURL_jll", "MozillaCACerts_jll"]
-uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
-
-[[LibCURL_jll]]
-deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
-uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
-
-[[LibGit2]]
-deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
-uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
-
-[[LibSSH2_jll]]
-deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
-uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
-
-[[Libdl]]
-uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-
-[[Libiconv_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778"
-uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
-version = "1.16.1+1"
-
-[[LightXML]]
-deps = ["Libdl", "XML2_jll"]
-git-tree-sha1 = "e129d9391168c677cd4800f5c0abb1ed8cb3794f"
-uuid = "9c8b4983-aa76-5018-a973-4c85ecc9e179"
-version = "0.9.0"
-
-[[LineSearches]]
-deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"]
-git-tree-sha1 = "f27132e551e959b3667d8c93eae90973225032dd"
-uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
-version = "7.1.1"
-
-[[LinearAlgebra]]
-deps = ["Libdl"]
-uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-
-[[LogExpFunctions]]
-deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"]
-git-tree-sha1 = "e5718a00af0ab9756305a0392832c8952c7426c1"
-uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
-version = "0.3.6"
-
-[[Logging]]
-uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
-
-[[MPI]]
-deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "Pkg", "Random", "Requires", "Serialization", "Sockets"]
-git-tree-sha1 = "d56a80d8cf8b9dc3050116346b3d83432b1912c0"
-uuid = "da04e1cc-30fd-572f-bb4f-1f8673147195"
-version = "0.19.2"
-
-[[MPICH_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "4b754a51cec47b0366056efcf6e19d95b2ecb54c"
-uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4"
-version = "4.0.0+0"
-
-[[MacroTools]]
-deps = ["Markdown", "Random"]
-git-tree-sha1 = "3d3e902b31198a27340d0bf00d6ac452866021cf"
-uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
-version = "0.5.9"
-
-[[Markdown]]
-deps = ["Base64"]
-uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
-
-[[MbedTLS_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
-
-[[MicrosoftMPI_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "a16aa086d335ed7e0170c5265247db29172af2f9"
-uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf"
-version = "10.1.3+2"
-
-[[Mmap]]
-uuid = "a63ad114-7e13-5084-954f-fe012c677804"
-
-[[MozillaCACerts_jll]]
-uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
-
-[[NLSolversBase]]
-deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"]
-git-tree-sha1 = "50310f934e55e5ca3912fb941dec199b49ca9b68"
-uuid = "d41bc354-129a-5804-8e4c-c37616107c6c"
-version = "7.8.2"
-
-[[NLsolve]]
-deps = ["Distances", "LineSearches", "LinearAlgebra", "NLSolversBase", "Printf", "Reexport"]
-git-tree-sha1 = "019f12e9a1a7880459d0173c182e6a99365d7ac1"
-uuid = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
-version = "4.5.1"
-
-[[NaNMath]]
-git-tree-sha1 = "b086b7ea07f8e38cf122f5016af580881ac914fe"
-uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
-version = "0.3.7"
-
-[[NearestNeighbors]]
-deps = ["Distances", "StaticArrays"]
-git-tree-sha1 = "16baacfdc8758bc374882566c9187e785e85c2f0"
-uuid = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
-version = "0.4.9"
-
-[[NetworkOptions]]
-uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
-
-[[OpenLibm_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
-
-[[OpenMPI_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"]
-git-tree-sha1 = "6340586e076b2abd41f5ba1a3b9c774ec6b30fde"
-uuid = "fe0851c0-eecd-5654-98d4-656369965a5c"
-version = "4.1.2+0"
-
-[[OpenSpecFun_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
-uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
-version = "0.5.5+0"
-
-[[OrderedCollections]]
-git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
-uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-version = "1.4.1"
-
-[[Parameters]]
-deps = ["OrderedCollections", "UnPack"]
-git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe"
-uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
-version = "0.12.3"
-
-[[Parsers]]
-deps = ["Dates", "Test"]
-git-tree-sha1 = "f8f5d2d4b4b07342e5811d2b6428e45524e241df"
-uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "1.0.2"
-
-[[PartitionedArrays]]
-deps = ["Distances", "IterativeSolvers", "LinearAlgebra", "MPI", "Printf", "SparseArrays", "SparseMatricesCSR"]
-git-tree-sha1 = "a80a117437dda3fe1f37f29ae56ce6259af1cbca"
-uuid = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
-version = "0.2.9"
-
-[[Pkg]]
-deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
-uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-
-[[Preferences]]
-deps = ["TOML"]
-git-tree-sha1 = "2cf929d64681236a2e074ffafb8d568733d2e6af"
-uuid = "21216c6a-2e73-6563-6e65-726566657250"
-version = "1.2.3"
-
-[[Printf]]
-deps = ["Unicode"]
-uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-
-[[QuadGK]]
-deps = ["DataStructures", "LinearAlgebra"]
-git-tree-sha1 = "78aadffb3efd2155af139781b8a8df1ef279ea39"
-uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
-version = "2.4.2"
-
-[[REPL]]
-deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
-uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
-
-[[Random]]
-deps = ["Serialization"]
-uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-
-[[RecipesBase]]
-git-tree-sha1 = "6bf3f380ff52ce0832ddd3a2a7b9538ed1bcca7d"
-uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
-version = "1.2.1"
-
-[[Reexport]]
-git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
-uuid = "189a3867-3050-52da-a836-e630ba90ab69"
-version = "1.2.2"
-
-[[Requires]]
-deps = ["UUIDs"]
-git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
-uuid = "ae029012-a4dd-5104-9daa-d747884805df"
-version = "1.3.0"
-
-[[SHA]]
-uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
-
-[[Serialization]]
-uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-
-[[SharedArrays]]
-deps = ["Distributed", "Mmap", "Random", "Serialization"]
-uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
-
-[[Sockets]]
-uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
-
-[[SparseArrays]]
-deps = ["LinearAlgebra", "Random"]
-uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
-
-[[SparseMatricesCSR]]
-deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
-git-tree-sha1 = "4870b3e7db7063927b163fb981bd579410b68b2d"
-uuid = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
-version = "0.6.6"
-
-[[SpecialFunctions]]
-deps = ["ChainRulesCore", "IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
-git-tree-sha1 = "8d0c8e3d0ff211d9ff4a0c2307d876c99d10bdf1"
-uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
-version = "2.1.2"
-
-[[Static]]
-deps = ["IfElse"]
-git-tree-sha1 = "d4da8b728580709d736704764e55d6ef38cb7c87"
-uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
-version = "0.5.3"
-
-[[StaticArrays]]
-deps = ["LinearAlgebra", "Random", "Statistics"]
-git-tree-sha1 = "a635a9333989a094bddc9f940c04c549cd66afcf"
-uuid = "90137ffa-7385-5640-81b9-e52037218182"
-version = "1.3.4"
-
-[[Statistics]]
-deps = ["LinearAlgebra", "SparseArrays"]
-uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
-
-[[StatsAPI]]
-git-tree-sha1 = "d88665adc9bcf45903013af0982e2fd05ae3d0a6"
-uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
-version = "1.2.0"
-
-[[SuiteSparse]]
-deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
-uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
-
-[[TOML]]
-deps = ["Dates"]
-uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
-
-[[Tar]]
-deps = ["ArgTools", "SHA"]
-uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
-
-[[Test]]
-deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
-uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[[TranscodingStreams]]
-deps = ["Random", "Test"]
-git-tree-sha1 = "216b95ea110b5972db65aa90f88d8d89dcb8851c"
-uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
-version = "0.9.6"
-
-[[UUIDs]]
-deps = ["Random", "SHA"]
-uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
-
-[[UnPack]]
-git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
-uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
-version = "1.0.2"
-
-[[Unicode]]
-uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
-
-[[WriteVTK]]
-deps = ["Base64", "CodecZlib", "FillArrays", "LightXML", "TranscodingStreams"]
-git-tree-sha1 = "ed7e4a2b4320da7f15feed343c55cfd54a85dc66"
-uuid = "64499a7a-5c06-52f2-abe2-ccb03c286192"
-version = "1.14.0"
-
-[[XML2_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
-git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"
-uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a"
-version = "2.9.12+0"
-
-[[Zlib_jll]]
-deps = ["Libdl"]
-uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
-
-[[nghttp2_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-
-[[p7zip_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
diff --git a/src/MultiField.jl b/src/MultiField.jl
index f8383242..ca429612 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -71,6 +71,20 @@ function MultiField.restrict_to_field(
   PVector(values,partition(gids))
 end
 
+function MultiField.restrict_to_field(
+  f::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle},free_values::BlockVector,field::Integer)
+
+  # BlockVector{PVector} -> PVector{BlockVector}
+  fv1 = map(partition,blocks(free_values)) |> to_parray_of_arrays
+  fv2 = map(mortar,fv1)
+
+  values = map(f.part_fe_space,fv2) do u,x
+    restrict_to_field(u,x,field)
+  end
+  gids = f.field_fe_space[field].gids
+  PVector(values,partition(gids))
+end
+
 #function FESpaces.zero_free_values(f::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle})
 #  return mortar(map(zero_free_values,f.field_fe_space))
 #end
diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
index 71e61ed9..47e5d4e6 100644
--- a/test/BlockSparseMatrixAssemblersTests.jl
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -8,14 +8,14 @@ using Gridap.FESpaces, Gridap.ReferenceFEs, Gridap.MultiField
 using GridapDistributed
 using PartitionedArrays
 
-nparts = (2,1)
+nparts = (2,2)
 parts = with_debug() do distribute
   distribute(LinearIndices((prod(nparts),)))
 end
 
 sol(x) = sum(x)
 
-model = CartesianDiscreteModel(parts,nparts,(0.0,1.0,0.0,1.0),(4,1))
+model = CartesianDiscreteModel(parts,nparts,(0.0,1.0,0.0,1.0),(4,4))
 Ω = Triangulation(model)
 
 reffe = LagrangianRefFE(Float64,QUAD,1)
@@ -68,17 +68,6 @@ bvecdata = collect_cell_vector(Yb,liform(vb))
 ############################################################################################
 # Block Assembly
 
-function same_solution(x1::PVector,x2::BlockVector,X,Xi,dΩ)
-  u1 = [FEFunction(X,x1)...]
-  u2 = map(i->FEFunction(Xi[i],x2[Block(i)]),1:blocklength(x2))
-
-  err = map(u1,u2) do u1,u2
-    eh = u1-u2
-    return sum(∫(eh⋅eh)dΩ)
-  end
-  return err
-end
-
 function LinearAlgebra.mul!(y::BlockVector,A::BlockMatrix,x::BlockVector)
   o = one(eltype(A))
   for i in blockaxes(A,2)
@@ -89,48 +78,32 @@ function LinearAlgebra.mul!(y::BlockVector,A::BlockMatrix,x::BlockVector)
   end
 end
 
-function test_axes(c::BlockVector,a::BlockMatrix,b::BlockVector)
-  res = Matrix(undef,blocksize(a)...)
-  for i in blockaxes(a,1)
-    for j in blockaxes(a,2)
-      res[i.n[1],j.n[1]] = Tuple([oids_are_equal(c[i].rows,a[i,j].rows),
-      oids_are_equal(a[i,j].cols,b[j].rows),
-      hids_are_equal(a[i,j].cols,b[j].rows)])
-    end
+function is_same_vector(x::BlockVector,y::PVector,Ub,U)
+  y_fespace = GridapDistributed.change_ghost(y,U.gids)
+  x_fespace = mortar(map((xi,Ui) -> GridapDistributed.change_ghost(xi,Ui.gids),blocks(x),Ub.field_fe_space))
+
+  res = map(1:num_fields(Ub)) do i
+    xi = restrict_to_field(Ub,x_fespace,i)
+    yi = restrict_to_field(U,y_fespace,i)
+    xi ≈ yi
   end
-  return res
+  return all(res)
 end
 
-#! TODO: Does not work if there are empty blocks due to PRange checks when multiplying. 
-#! Maybe we should change to MatrixBlocks?  
-
 assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
 
-rows = get_rows(assem_blocks)
-cols = get_cols(assem_blocks)
-mat_builder = get_matrix_builder(assem_blocks)
-
-m1 = Gridap.FESpaces.nz_counter(mat_builder,(rows,cols))
-Gridap.FESpaces.symbolic_loop_matrix!(m1,assem_blocks,bmatdata)
-m2 = Gridap.FESpaces.nz_allocation(m1)
-Gridap.FESpaces.numeric_loop_matrix!(m2,assem_blocks,bmatdata)
-
-m3 = Gridap.FESpaces.create_from_nz(m2)
-
 A1_blocks = assemble_matrix(assem_blocks,bmatdata);
 b1_blocks = assemble_vector(assem_blocks,bvecdata);
 
-y1_blocks = mortar(map(Aii->PVector(0.0,Aii.rows),diag(A1_blocks.blocks)));
-x1_blocks = mortar(map(Aii->PVector(1.0,Aii.cols),diag(A1_blocks.blocks)));
-test_axes(y1_blocks,A1_blocks,x1_blocks)
-
+y1_blocks = mortar(map(Aii->pfill(0.0,partition(axes(Aii,1))),diag(blocks(A1_blocks))));
+x1_blocks = mortar(map(Aii->pfill(1.0,partition(axes(Aii,2))),diag(blocks(A1_blocks))));
 mul!(y1_blocks,A1_blocks,x1_blocks)
 
-y1 = PVector(0.0,A1.rows)
-x1 = PVector(1.0,A1.cols)
+y1 = pfill(0.0,partition(axes(A1)[1]))
+x1 = pfill(1.0,partition(axes(A1)[2]))
 mul!(y1,A1,x1)
 
-@test all(same_solution(y1,y1_blocks,X,block_trials,dΩ) .< 1e-5)
+is_same_vector(y1_blocks,y1,Yb,Y)
 
 ############################################################################################
 

From 7c033b5d6f25b92989a1fe903a99dfc3808ff67e Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 15 Aug 2023 12:40:31 +1000
Subject: [PATCH 19/56] Added docs/manifest.toml to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 91a3e882..4f9205f8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@
 /dev/
 /docs/build/
 /docs/site/
+/docs/Manifest.toml
 /tmp/
 *.vtu
 *.pvtu

From dca9a583646b1c7e9be56c60e1518a84cabd8803 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 15 Aug 2023 13:19:35 +1000
Subject: [PATCH 20/56] Fixed AffineFEOperators

---
 src/Algebra.jl                           | 38 +++++++++++++++++-------
 test/BlockSparseMatrixAssemblersTests.jl | 30 +++++++++----------
 2 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index 304300c2..bae3a221 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -493,7 +493,7 @@ function _sa_create_from_nz_with_callback(callback,async_callback,a)
   
   # Move values to the owner part
   # since we have integrated only over owned cells
-  t = PArrays.assemble_coo!(I,J,V,partition(rows))
+  t = _assemble_coo!(I,J,V,rows)
 
   # Here we can overlap computations
   # This is a good place to overlap since
@@ -631,6 +631,20 @@ function _setup_matrix(values,rows::Vector{<:PRange},cols::Vector{<:PRange})
   return mortar(block_mats)
 end
 
+function _assemble_coo!(I,J,V,rows::PRange)
+  PArrays.assemble_coo!(I,J,V,partition(rows))
+end
+
+function _assemble_coo!(I,J,V,rows::Vector{<:PRange})
+  block_ids = CartesianIndices(I)
+  map(block_ids) do id
+    i = id[1]; j = id[2];
+    _assemble_coo!(I[i,j],J[i,j],V[i,j],rows[i])
+  end
+end
+
+Base.wait(t::Matrix) = map(wait,t)
+
 # dofs_gids_prange can be either test_dofs_gids_prange or trial_dofs_gids_prange
 # In the former case, gids is a vector of global test dof identifiers, while in the 
 # latter, a vector of global trial dof identifiers
@@ -659,8 +673,11 @@ end
 function _setup_prange_with_ghosts(dofs_gids_prange,gids)
   ngdofs = length(dofs_gids_prange)
   dofs_gids_partition = partition(dofs_gids_prange)
-
   gids_ghost_lids_to_dofs_ghost_lids = map(ghost_lids_touched,dofs_gids_partition,gids)
+  return _setup_prange_impl_(ngdofs,gids_ghost_lids_to_dofs_ghost_lids,dofs_gids_partition)
+end
+
+function _setup_prange_impl_(ngdofs,gids_ghost_lids_to_dofs_ghost_lids,dofs_gids_partition)
   gids_ghost_to_global, gids_ghost_to_owner = map(
     find_gid_and_owner,gids_ghost_lids_to_dofs_ghost_lids,dofs_gids_partition) |> tuple_of_arrays
 
@@ -674,7 +691,7 @@ function _setup_prange_with_ghosts(dofs_gids_prange,gids)
   end
   _find_neighbours!(indices, dofs_gids_partition)
   return PRange(indices)
-end 
+end
 
 function Algebra.create_from_nz(a::PVectorAllocationTrackOnlyValues{<:FullyAssembledRows})
   rows = _setup_prange_without_ghosts(a.test_dofs_gids_prange)
@@ -832,18 +849,19 @@ end
 
 function Algebra.create_from_nz(a::PVectorAllocationTrackTouchedAndValues)
   test_dofs_prange = a.test_dofs_gids_prange # dof ids of the test space
-  test_dofs_prange_partition = partition(test_dofs_prange)
   ngrdofs = length(test_dofs_prange)
-   
+  
   # Find the ghost rows
-  I_ghost_lids_to_dofs_ghost_lids=map(local_views(a.allocations),test_dofs_prange_partition) do allocation, indices
-    dofs_lids_touched=findall(allocation.touched)
+  allocations = local_views(a.allocations)
+  indices = partition(test_dofs_prange)
+  I_ghost_lids_to_dofs_ghost_lids = map(allocations, indices) do allocation, indices
+    dofs_lids_touched = findall(allocation.touched)
     loc_to_gho = local_to_ghost(indices)
     n_I_ghost_lids = count((x)->loc_to_gho[x]!=0,dofs_lids_touched)
     I_ghost_lids = Vector{Int32}(undef,n_I_ghost_lids)
     cur = 1
     for lid in dofs_lids_touched
-      dof_lid=loc_to_gho[lid]
+      dof_lid = loc_to_gho[lid]
       if dof_lid != 0
         I_ghost_lids[cur] = dof_lid
         cur = cur+1
@@ -854,7 +872,7 @@ function Algebra.create_from_nz(a::PVectorAllocationTrackTouchedAndValues)
 
   rows = _setup_prange_impl_(ngrdofs,
                              I_ghost_lids_to_dofs_ghost_lids,
-                             test_dofs_prange_partition)
+                             indices)
 
   b = _rhs_callback(a,rows)
   t2 = assemble!(b)
@@ -863,5 +881,5 @@ function Algebra.create_from_nz(a::PVectorAllocationTrackTouchedAndValues)
   if t2 !== nothing
     wait(t2)
   end
-  b
+  return b
 end
diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
index 47e5d4e6..a3c2875e 100644
--- a/test/BlockSparseMatrixAssemblersTests.jl
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -90,29 +90,29 @@ function is_same_vector(x::BlockVector,y::PVector,Ub,U)
   return all(res)
 end
 
-assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
+function is_same_matrix(Ab::BlockMatrix,A::PSparseMatrix,Xb,X)
+  yb = mortar(map(Aii->pfill(0.0,partition(axes(Aii,1))),diag(blocks(Ab))));
+  xb = mortar(map(Aii->pfill(1.0,partition(axes(Aii,2))),diag(blocks(Ab))));
+  mul!(yb,Ab,xb)
 
-A1_blocks = assemble_matrix(assem_blocks,bmatdata);
-b1_blocks = assemble_vector(assem_blocks,bvecdata);
+  y = pfill(0.0,partition(axes(A)[1]))
+  x = pfill(1.0,partition(axes(A)[2]))
+  mul!(y,A,x)
 
-y1_blocks = mortar(map(Aii->pfill(0.0,partition(axes(Aii,1))),diag(blocks(A1_blocks))));
-x1_blocks = mortar(map(Aii->pfill(1.0,partition(axes(Aii,2))),diag(blocks(A1_blocks))));
-mul!(y1_blocks,A1_blocks,x1_blocks)
+  return is_same_vector(yb,y,Xb,X)
+end
 
-y1 = pfill(0.0,partition(axes(A1)[1]))
-x1 = pfill(1.0,partition(axes(A1)[2]))
-mul!(y1,A1,x1)
+assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
 
-is_same_vector(y1_blocks,y1,Yb,Y)
+A1_blocks = assemble_matrix(assem_blocks,bmatdata);
+b1_blocks = assemble_vector(assem_blocks,bvecdata);
+is_same_matrix(A1_blocks,A1,Xb,X)
 
 ############################################################################################
 
 op = AffineFEOperator(biform,liform,X,Y)
 block_op = AffineFEOperator(biform,liform,Xb,Yb)
-
-
-A11 = A1_blocks.blocks[1,1]
-A12 = A1_blocks.blocks[1,2]
-A22 = A1_blocks.blocks[2,2]
+is_same_vector(get_vector(block_op),get_vector(op),Yb,Y)
+is_same_matrix(get_matrix(block_op),get_matrix(op),Xb,X)
 
 end
\ No newline at end of file

From ee07718e60630b4100167298654e421526f76e96 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 15 Aug 2023 17:37:58 +1000
Subject: [PATCH 21/56] Tests working for arbitrary block assembly

---
 test/BlockSparseMatrixAssemblersTests.jl | 105 ++++++++++++-----------
 1 file changed, 55 insertions(+), 50 deletions(-)

diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
index a3c2875e..04e0d9cc 100644
--- a/test/BlockSparseMatrixAssemblersTests.jl
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -8,6 +8,51 @@ using Gridap.FESpaces, Gridap.ReferenceFEs, Gridap.MultiField
 using GridapDistributed
 using PartitionedArrays
 
+function LinearAlgebra.mul!(y::BlockVector,A::BlockMatrix,x::BlockVector)
+  o = one(eltype(A))
+  for i in blockaxes(A,2)
+    fill!(y[i],0.0)
+    for j in blockaxes(A,2)
+      mul!(y[i],A[i,j],x[j],o,o)
+    end
+  end
+end
+
+function GridapDistributed.change_ghost(
+  x::BlockVector,
+  X::GridapDistributed.DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}}) where {NB,SB,P}
+  block_ranges = MultiField.get_block_ranges(NB,SB,P)
+  array = map(block_ranges,blocks(x)) do range, xi
+    Xi = (length(range) == 1) ? X.field_fe_space[range[1]] : MultiFieldFESpace(X.field_fe_space[range])
+    GridapDistributed.change_ghost(xi,Xi.gids)
+  end
+  return mortar(array)
+end
+
+function is_same_vector(x::BlockVector,y::PVector,Ub,U)
+  y_fespace = GridapDistributed.change_ghost(y,U.gids)
+  x_fespace = GridapDistributed.change_ghost(x,Ub)
+
+  res = map(1:num_fields(Ub)) do i
+    xi = restrict_to_field(Ub,x_fespace,i)
+    yi = restrict_to_field(U,y_fespace,i)
+    xi ≈ yi
+  end
+  return all(res)
+end
+
+function is_same_matrix(Ab::BlockMatrix,A::PSparseMatrix,Xb,X)
+  yb = mortar(map(Aii->pfill(0.0,partition(axes(Aii,1))),diag(blocks(Ab))));
+  xb = mortar(map(Aii->pfill(1.0,partition(axes(Aii,2))),diag(blocks(Ab))));
+  mul!(yb,Ab,xb)
+
+  y = pfill(0.0,partition(axes(A)[1]))
+  x = pfill(1.0,partition(axes(A)[2]))
+  mul!(y,A,x)
+
+  return is_same_vector(yb,y,Xb,X)
+end
+
 nparts = (2,2)
 parts = with_debug() do distribute
   distribute(LinearIndices((prod(nparts),)))
@@ -15,7 +60,7 @@ end
 
 sol(x) = sum(x)
 
-model = CartesianDiscreteModel(parts,nparts,(0.0,1.0,0.0,1.0),(4,4))
+model = CartesianDiscreteModel(parts,nparts,(0.0,1.0,0.0,1.0),(8,8))
 Ω = Triangulation(model)
 
 reffe = LagrangianRefFE(Float64,QUAD,1)
@@ -23,16 +68,16 @@ V = FESpace(Ω, reffe)
 U = TrialFESpace(sol,V)
 
 dΩ = Measure(Ω, 4)
-#biform((u1,u2,u3),(v1,v2,v3)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 - u2⋅v1 - v3⋅u3)*dΩ # + v3⋅u1 - v1⋅u3)*dΩ
-#liform((v1,v2,v3)) = ∫(v1 + v2 - v3)*dΩ
-biform((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 - u2⋅v1)*dΩ
-liform((v1,v2)) = ∫(v1 + v2)*dΩ
+biform((u1,u2,u3),(v1,v2,v3)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 + u2⋅v1 + v3⋅u3 + v3⋅u1 + v1⋅u3)*dΩ
+liform((v1,v2,v3)) = ∫(v1 + v2 + v3)*dΩ
+#biform((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 - u2⋅v1)*dΩ
+#liform((v1,v2)) = ∫(v1 + v2)*dΩ
 
 ############################################################################################
 # Normal assembly 
 
-Y = MultiFieldFESpace([V,V])
-X = MultiFieldFESpace([U,U])
+Y = MultiFieldFESpace([V,V,V])
+X = MultiFieldFESpace([U,U,U])
 
 u = get_trial_fe_basis(X)
 v = get_fe_basis(Y)
@@ -46,17 +91,14 @@ A1 = assemble_matrix(assem,matdata)
 b1 = assemble_vector(assem,vecdata)
 A2,b2 = assemble_matrix_and_vector(assem,data);
 
-assem11 = SparseMatrixAssembler(U,V,FullyAssembledRows())
-A11 = assemble_matrix((u1,v1)->∫(∇(u1)⋅∇(v1))*dΩ,assem11,U,V)
-
 ############################################################################################
 # Block MultiFieldStyle
 
 #mfs = BlockMultiFieldStyle()
-mfs = BlockMultiFieldStyle()#2,(1,2))
+mfs = BlockMultiFieldStyle(2,(1,2))
 
-Yb  = MultiFieldFESpace([V,V];style=mfs)
-Xb  = MultiFieldFESpace([U,U];style=mfs)
+Yb  = MultiFieldFESpace([V,V,V];style=mfs)
+Xb  = MultiFieldFESpace([U,U,U];style=mfs)
 
 ub = get_trial_fe_basis(Xb)
 vb = get_fe_basis(Yb)
@@ -68,48 +110,11 @@ bvecdata = collect_cell_vector(Yb,liform(vb))
 ############################################################################################
 # Block Assembly
 
-function LinearAlgebra.mul!(y::BlockVector,A::BlockMatrix,x::BlockVector)
-  o = one(eltype(A))
-  for i in blockaxes(A,2)
-    fill!(y[i],0.0)
-    for j in blockaxes(A,2)
-      mul!(y[i],A[i,j],x[j],o,o)
-    end
-  end
-end
-
-function is_same_vector(x::BlockVector,y::PVector,Ub,U)
-  y_fespace = GridapDistributed.change_ghost(y,U.gids)
-  x_fespace = mortar(map((xi,Ui) -> GridapDistributed.change_ghost(xi,Ui.gids),blocks(x),Ub.field_fe_space))
-
-  res = map(1:num_fields(Ub)) do i
-    xi = restrict_to_field(Ub,x_fespace,i)
-    yi = restrict_to_field(U,y_fespace,i)
-    xi ≈ yi
-  end
-  return all(res)
-end
-
-function is_same_matrix(Ab::BlockMatrix,A::PSparseMatrix,Xb,X)
-  yb = mortar(map(Aii->pfill(0.0,partition(axes(Aii,1))),diag(blocks(Ab))));
-  xb = mortar(map(Aii->pfill(1.0,partition(axes(Aii,2))),diag(blocks(Ab))));
-  mul!(yb,Ab,xb)
-
-  y = pfill(0.0,partition(axes(A)[1]))
-  x = pfill(1.0,partition(axes(A)[2]))
-  mul!(y,A,x)
-
-  return is_same_vector(yb,y,Xb,X)
-end
-
 assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
-
 A1_blocks = assemble_matrix(assem_blocks,bmatdata);
 b1_blocks = assemble_vector(assem_blocks,bvecdata);
 is_same_matrix(A1_blocks,A1,Xb,X)
 
-############################################################################################
-
 op = AffineFEOperator(biform,liform,X,Y)
 block_op = AffineFEOperator(biform,liform,Xb,Yb)
 is_same_vector(get_vector(block_op),get_vector(op),Yb,Y)

From 97611f509ce6ea22ab09407836c49987b0f9e9fc Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 22 Aug 2023 16:14:21 +1000
Subject: [PATCH 22/56] Fixed block assembler tests

---
 src/Algebra.jl                           | 7 ++++---
 test/BlockSparseMatrixAssemblersTests.jl | 5 +++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index 38af8d6a..203aa3a2 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -501,7 +501,7 @@ function _sa_create_from_nz_with_callback(callback,async_callback,a)
   # Move (I,J,V) triplets to the owner process of each row I.
   # The triplets are accompanyed which Jo which is the process column owner
   Jo = get_gid_owners(J,trial_dofs_gids_prange;ax=:cols)
-  t  = _assemble_coo!(I,J,V,partition(rows);owners=Jo)
+  t  = _assemble_coo!(I,J,V,rows;owners=Jo)
 
   # Here we can overlap computations
   # This is a good place to overlap since
@@ -757,7 +757,7 @@ Base.wait(t::Matrix) = map(wait,t)
 # dofs_gids_prange can be either test_dofs_gids_prange or trial_dofs_gids_prange
 # In the former case, gids is a vector of global test dof identifiers, while in the 
 # latter, a vector of global trial dof identifiers
-function _setup_prange(dofs_gids_prange::PRange,gids;ghost=true,owners=nothing;kwargs...)
+function _setup_prange(dofs_gids_prange::PRange,gids;ghost=true,owners=nothing,kwargs...)
   if !ghost
     _setup_prange_without_ghosts(dofs_gids_prange)
   elseif isa(owners,Nothing)
@@ -784,7 +784,8 @@ function _setup_prange(dofs_gids_prange::AbstractVector{<:PRange},
     end
     return gids_ax_slice, _owners
   end |> tuple_of_arrays
-  return map((p,g) -> _setup_prange(p,g;ghost=ghost,owners=_owners), dofs_gids_prange, gids_ax_slice)
+  
+  return map((p,g,o) -> _setup_prange(p,g;ghost=ghost,owners=o),dofs_gids_prange,gids_ax_slice,_owners)
 end
 
 # Create PRange for the rows of the linear system
diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
index 04e0d9cc..c02333f2 100644
--- a/test/BlockSparseMatrixAssemblersTests.jl
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -94,8 +94,8 @@ A2,b2 = assemble_matrix_and_vector(assem,data);
 ############################################################################################
 # Block MultiFieldStyle
 
-#mfs = BlockMultiFieldStyle()
-mfs = BlockMultiFieldStyle(2,(1,2))
+mfs = BlockMultiFieldStyle()
+#mfs = BlockMultiFieldStyle(2,(1,2))
 
 Yb  = MultiFieldFESpace([V,V,V];style=mfs)
 Xb  = MultiFieldFESpace([U,U,U];style=mfs)
@@ -113,6 +113,7 @@ bvecdata = collect_cell_vector(Yb,liform(vb))
 assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
 A1_blocks = assemble_matrix(assem_blocks,bmatdata);
 b1_blocks = assemble_vector(assem_blocks,bvecdata);
+is_same_vector(b1_blocks,b1,Yb,Y)
 is_same_matrix(A1_blocks,A1,Xb,X)
 
 op = AffineFEOperator(biform,liform,X,Y)

From 635b649a5f51c9ea00355533430a5c190d49bd0c Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 22 Aug 2023 16:23:20 +1000
Subject: [PATCH 23/56] Minor fix

---
 src/Algebra.jl                           | 2 +-
 test/BlockSparseMatrixAssemblersTests.jl | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index 203aa3a2..6cf49256 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -662,7 +662,7 @@ function _assemble_coo!(I,J,V,rows::Vector{<:PRange};owners=nothing)
     if isa(owners,Nothing)
       _assemble_coo!(I[i,j],J[i,j],V[i,j],rows[i])
     else
-      _assemble_coo!(I[i,j],J[i,j],V[i,j],rows[i],owners=owners[j])
+      _assemble_coo!(I[i,j],J[i,j],V[i,j],rows[i],owners=owners[i,j])
     end
   end
 end
diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
index c02333f2..3666dde2 100644
--- a/test/BlockSparseMatrixAssemblersTests.jl
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -94,8 +94,8 @@ A2,b2 = assemble_matrix_and_vector(assem,data);
 ############################################################################################
 # Block MultiFieldStyle
 
-mfs = BlockMultiFieldStyle()
-#mfs = BlockMultiFieldStyle(2,(1,2))
+#mfs = BlockMultiFieldStyle()
+mfs = BlockMultiFieldStyle(2,(1,2))
 
 Yb  = MultiFieldFESpace([V,V,V];style=mfs)
 Xb  = MultiFieldFESpace([U,U,U];style=mfs)

From 0f9bf914e29ac15da82ce838d21acb70c5297dab Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 22 Aug 2023 17:37:36 +1000
Subject: [PATCH 24/56] Moved code around

---
 src/Algebra.jl    | 862 ++++++++++++++++++++++++----------------------
 src/MultiField.jl |  43 ---
 2 files changed, 456 insertions(+), 449 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index 6cf49256..58f3b63f 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -264,6 +264,9 @@ function Algebra.allocate_vector(::Type{<:PVector{V,A}},ids::PRange) where {V,A}
   PVector(values,partition(ids))
 end
 
+
+# PSparseMatrix assembly
+
 struct FullyAssembledRows end
 struct SubAssembledRows end
 
@@ -355,6 +358,16 @@ function change_axes(a::DistributedAllocationCOO{A,B,<:PRange,<:PRange},
   DistributedAllocationCOO(a.par_strategy,allocs,axes[1],axes[2])
 end
 
+function change_axes(a::MatrixBlock{<:DistributedAllocationCOO},
+                     axes::Tuple{<:Vector,<:Vector})
+  block_ids  = CartesianIndices(a.array)
+  rows, cols = axes
+  array = map(block_ids) do I
+    change_axes(a[I],(rows[I[1]],cols[I[2]]))
+  end
+  return ArrayBlock(array,a.touched)
+end
+
 function local_views(a::DistributedAllocationCOO)
   a.allocs
 end
@@ -365,6 +378,11 @@ function local_views(a::DistributedAllocationCOO,test_dofs_gids_prange,trial_dof
   a.allocs
 end
 
+function local_views(a::MatrixBlock{<:DistributedAllocationCOO})
+  array = map(local_views,a.array) |> to_parray_of_arrays
+  return map(ai -> ArrayBlock(ai,a.touched),array)
+end
+
 function get_allocations(a::DistributedAllocationCOO)
   I,J,V = map(local_views(a)) do alloc
     alloc.I, alloc.J, alloc.V
@@ -372,25 +390,15 @@ function get_allocations(a::DistributedAllocationCOO)
   return I,J,V
 end
 
-get_test_gids(a::DistributedAllocationCOO) = a.test_dofs_gids_prange
-get_trial_gids(a::DistributedAllocationCOO) = a.trial_dofs_gids_prange
-
-function first_gdof_from_ids(ids)
-  lid_to_gid   = local_to_global(ids) 
-  owner_to_lid = own_to_local(ids)
-  return (own_length(ids) > 0) ? Int(lid_to_gid[first(owner_to_lid)]) : 1
+function get_allocations(a::ArrayBlock{<:DistributedAllocationCOO})
+  tuple_of_array_of_parrays = map(get_allocations,a.array) |> tuple_of_arrays
+  return tuple_of_array_of_parrays
 end
 
-function find_gid_and_owner(ighost_to_jghost,jindices)
-  jghost_to_local  = ghost_to_local(jindices)
-  jlocal_to_global = local_to_global(jindices)
-  jlocal_to_owner  = local_to_owner(jindices)
-  ighost_to_jlocal = view(jghost_to_local,ighost_to_jghost)
-
-  ighost_to_global = jlocal_to_global[ighost_to_jlocal]
-  ighost_to_owner = jlocal_to_owner[ighost_to_jlocal]
-  return ighost_to_global, ighost_to_owner
-end
+get_test_gids(a::DistributedAllocationCOO)  = a.test_dofs_gids_prange
+get_trial_gids(a::DistributedAllocationCOO) = a.trial_dofs_gids_prange
+get_test_gids(a::ArrayBlock{<:DistributedAllocationCOO})  = map(get_test_gids,diag(a.array))
+get_trial_gids(a::ArrayBlock{<:DistributedAllocationCOO}) = map(get_trial_gids,diag(a.array))
 
 function Algebra.create_from_nz(a::PSparseMatrix)
   # For FullyAssembledRows the underlying Exchanger should
@@ -405,48 +413,12 @@ function Algebra.create_from_nz(a::DistributedAllocationCOO{<:FullyAssembledRows
   return A
 end
 
-# The given ids are assumed to be a sub-set of the lids
-function ghost_lids_touched(a::AbstractLocalIndices,gids::AbstractVector{<:Integer})
-  glo_to_loc = global_to_local(a)
-  loc_to_gho = local_to_ghost(a)
-  
-  # First pass: Allocate
-  i = 0
-  ghost_lids_touched = fill(false,ghost_length(a))
-  for gid in gids
-    lid = glo_to_loc[gid]
-    ghost_lid = loc_to_gho[lid]
-    if ghost_lid > 0 && !ghost_lids_touched[ghost_lid]
-      ghost_lids_touched[ghost_lid] = true
-      i += 1
-    end
-  end
-  gids_ghost_lid_to_ghost_lid = Vector{Int32}(undef,i)
-
-  # Second pass: fill 
-  i = 1
-  fill!(ghost_lids_touched,false)
-  for gid in gids
-    lid = glo_to_loc[gid]
-    ghost_lid = loc_to_gho[lid]
-    if ghost_lid > 0 && !ghost_lids_touched[ghost_lid]
-      ghost_lids_touched[ghost_lid] = true
-      gids_ghost_lid_to_ghost_lid[i] = ghost_lid
-      i += 1
-    end
-  end
-
-  return gids_ghost_lid_to_ghost_lid
+function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:FullyAssembledRows}})
+  f(x) = nothing
+  A, = _fa_create_from_nz_with_callback(f,a)
+  return A
 end
 
-# Find the neighbours of partition1 trying 
-# to use those in partition2 as a hint 
-function _find_neighbours!(partition1, partition2)
-  partition2_snd, partition2_rcv = assembly_neighbors(partition2)
-  partition2_graph = ExchangeGraph(partition2_snd, partition2_rcv)
-  return assembly_neighbors(partition1; neighbors=partition2_graph)
-end 
-
 function _fa_create_from_nz_with_callback(callback,a)
 
   # Recover some data
@@ -485,6 +457,12 @@ function Algebra.create_from_nz(a::DistributedAllocationCOO{<:SubAssembledRows})
   return A
 end
 
+function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:SubAssembledRows}})
+  f(x) = nothing
+  A, = _sa_create_from_nz_with_callback(f,f,a)
+  return A
+end
+
 function _sa_create_from_nz_with_callback(callback,async_callback,a)
   # Recover some data
   I,J,V = get_allocations(a)
@@ -537,6 +515,9 @@ function _sa_create_from_nz_with_callback(callback,async_callback,a)
   return A, b
 end
 
+
+# PVector assembly 
+
 struct PVectorBuilder{T,B}
   local_vector_type::Type{T}
   par_strategy::B
@@ -595,379 +576,98 @@ function local_views(a::PVectorAllocationTrackOnlyValues,rows)
   a.values
 end
 
-# to_global! & to_local! analogs, for dispatching in block assembly
-
-function to_local_indices!(I,ids::PRange;kwargs...)
-  map(to_local!,I,partition(ids))
+function Algebra.create_from_nz(a::PVectorAllocationTrackOnlyValues{<:FullyAssembledRows})
+  rows = _setup_prange_without_ghosts(a.test_dofs_gids_prange)
+  _rhs_callback(a,rows)
 end
 
-function to_global_indices!(I,ids::PRange;kwargs...)
-  map(to_global!,I,partition(ids))
+function Algebra.create_from_nz(a::PVectorAllocationTrackOnlyValues{<:SubAssembledRows})
+  # This point MUST NEVER be reached. If reached there is an inconsistency
+  # in the parallel code in charge of vector assembly
+  @assert false
 end
 
-function get_gid_owners(I,ids::PRange;kwargs...)
-  map(I,partition(ids)) do I, indices 
-    glo_to_loc = global_to_local(indices) 
-    loc_to_own = local_to_owner(indices)
-    map(x->loc_to_own[glo_to_loc[x]], I)
-  end 
-end
+function _rhs_callback(row_partitioned_vector_partition,rows)
+  # The ghost values in row_partitioned_vector_partition are 
+  # aligned with the FESpace but not with the ghost values in the rows of A
+  b_fespace = PVector(row_partitioned_vector_partition.values,
+                      partition(row_partitioned_vector_partition.test_dofs_gids_prange))
 
-for f in [:to_local_indices!, :to_global_indices!, :get_gid_owners]
-  @eval begin
-    function $f(I::Vector,ids::AbstractVector{<:PRange};kwargs...)
-      map($f,I,ids)
-    end
+  # This one is aligned with the rows of A
+  b = similar(b_fespace,eltype(b_fespace),(rows,))
 
-    function $f(I::Matrix,ids::AbstractVector{<:PRange};ax=:rows)
-      @check ax ∈ [:rows,:cols]
-      block_ids = CartesianIndices(I)
-      map(block_ids) do id
-        i = id[1]; j = id[2];
-        if ax == :rows
-          $f(I[i,j],ids[i])
-        else
-          $f(I[i,j],ids[j])
-        end
-      end
+  # First transfer owned values
+  b .= b_fespace
+
+  # Now transfer ghost
+  function transfer_ghost(b,b_fespace,ids,ids_fespace)
+    num_ghosts_vec = ghost_length(ids)
+    gho_to_loc_vec = ghost_to_local(ids)
+    loc_to_glo_vec = local_to_global(ids)
+    gid_to_lid_fe  = global_to_local(ids_fespace)
+    for ghost_lid_vec in 1:num_ghosts_vec
+      lid_vec     = gho_to_loc_vec[ghost_lid_vec]
+      gid         = loc_to_glo_vec[lid_vec]
+      lid_fespace = gid_to_lid_fe[gid]
+      b[lid_vec] = b_fespace[lid_fespace]
     end
   end
-end
+  map(
+    transfer_ghost,
+    partition(b),
+    partition(b_fespace),
+    b.index_partition,
+    b_fespace.index_partition)
 
-function _setup_matrix(values,rows::PRange,cols::PRange)
-  return PSparseMatrix(values,partition(rows),partition(cols))
+  return b
 end
 
-function _setup_matrix(values,rows::Vector{<:PRange},cols::Vector{<:PRange})
-  block_ids  = CartesianIndices((length(rows),length(cols)))
-  block_mats = map(block_ids) do I
-    block_values = map(v -> blocks(v)[I],values)
-    return _setup_matrix(block_values,rows[I[1]],cols[I[2]])
-  end
-  return mortar(block_mats)
+function Algebra.create_from_nz(a::PVector)
+  assemble!(a) |> wait
+  return a
 end
 
-function _assemble_coo!(I,J,V,rows::PRange;owners=nothing)
-  if isa(owners,Nothing)
-    PArrays.assemble_coo!(I,J,V,partition(rows))
-  else
-    assemble_coo_with_column_owner!(I,J,V,partition(rows),owners)
+function Algebra.create_from_nz(
+  a::DistributedAllocationCOO{<:FullyAssembledRows},
+  c_fespace::PVectorAllocationTrackOnlyValues{<:FullyAssembledRows})
+
+  function callback(rows)
+    _rhs_callback(c_fespace,rows)
   end
+
+  A,b = _fa_create_from_nz_with_callback(callback,a)
+  return A,b
 end
 
-function _assemble_coo!(I,J,V,rows::Vector{<:PRange};owners=nothing)
-  block_ids = CartesianIndices(I)
-  map(block_ids) do id
-    i = id[1]; j = id[2];
-    if isa(owners,Nothing)
-      _assemble_coo!(I[i,j],J[i,j],V[i,j],rows[i])
-    else
-      _assemble_coo!(I[i,j],J[i,j],V[i,j],rows[i],owners=owners[i,j])
-    end
-  end
+struct PVectorAllocationTrackTouchedAndValues{A,B,C}
+  allocations::A
+  values::B
+  test_dofs_gids_prange::C
 end
 
-function assemble_coo_with_column_owner!(I,J,V,row_partition,Jown)
-  """
-    Returns three JaggedArrays with the coo triplets
-    to be sent to the corresponding owner parts in parts_snd
-  """
-  function setup_snd(part,parts_snd,row_lids,coo_entries_with_column_owner)
-      global_to_local_row = global_to_local(row_lids)
-      local_row_to_owner = local_to_owner(row_lids)
-      owner_to_i = Dict(( owner=>i for (i,owner) in enumerate(parts_snd) ))
-      ptrs = zeros(Int32,length(parts_snd)+1)
-      k_gi, k_gj, k_jo, k_v = coo_entries_with_column_owner
-      for k in 1:length(k_gi)
-          gi = k_gi[k]
-          li = global_to_local_row[gi]
-          owner = local_row_to_owner[li]
-          if owner != part
-              ptrs[owner_to_i[owner]+1] +=1
-          end
-      end
-      PArrays.length_to_ptrs!(ptrs)
-      gi_snd_data = zeros(eltype(k_gi),ptrs[end]-1)
-      gj_snd_data = zeros(eltype(k_gj),ptrs[end]-1)
-      jo_snd_data = zeros(eltype(k_jo),ptrs[end]-1)
-      v_snd_data = zeros(eltype(k_v),ptrs[end]-1)
-      for k in 1:length(k_gi)
-          gi = k_gi[k]
-          li = global_to_local_row[gi]
-          owner = local_row_to_owner[li]
-          if owner != part
-              gj = k_gj[k]
-              v = k_v[k]
-              p = ptrs[owner_to_i[owner]]
-              gi_snd_data[p] = gi
-              gj_snd_data[p] = gj
-              jo_snd_data[p] = k_jo[k]
-              v_snd_data[p] = v
-              k_v[k] = zero(v)
-              ptrs[owner_to_i[owner]] += 1
-          end
-      end
-      PArrays.rewind_ptrs!(ptrs)
-      gi_snd = JaggedArray(gi_snd_data,ptrs)
-      gj_snd = JaggedArray(gj_snd_data,ptrs)
-      jo_snd = JaggedArray(jo_snd_data,ptrs)
-      v_snd = JaggedArray(v_snd_data,ptrs)
-      gi_snd, gj_snd, jo_snd, v_snd
-  end
-  """
-    Pushes to coo_entries_with_column_owner the tuples 
-    gi_rcv,gj_rcv,jo_rcv,v_rcv received from remote processes
-  """
-  function setup_rcv!(coo_entries_with_column_owner,gi_rcv,gj_rcv,jo_rcv,v_rcv)
-      k_gi, k_gj, k_jo, k_v = coo_entries_with_column_owner
-      current_n = length(k_gi)
-      new_n = current_n + length(gi_rcv.data)
-      resize!(k_gi,new_n)
-      resize!(k_gj,new_n)
-      resize!(k_jo,new_n)
-      resize!(k_v,new_n)
-      for p in 1:length(gi_rcv.data)
-          k_gi[current_n+p] = gi_rcv.data[p]
-          k_gj[current_n+p] = gj_rcv.data[p]
-          k_jo[current_n+p] = jo_rcv.data[p]
-          k_v[current_n+p] = v_rcv.data[p]
-      end
+function Algebra.create_from_nz(
+  a::DistributedAllocationCOO{<:SubAssembledRows},
+  c_fespace::PVectorAllocationTrackOnlyValues{<:SubAssembledRows})
+
+  function callback(rows)
+    _rhs_callback(c_fespace,rows)
   end
-  part = linear_indices(row_partition)
-  parts_snd, parts_rcv = assembly_neighbors(row_partition)
-  coo_entries_with_column_owner = map(tuple,I,J,Jown,V)
-  gi_snd, gj_snd, jo_snd, v_snd = map(setup_snd,part,parts_snd,row_partition,coo_entries_with_column_owner) |> tuple_of_arrays
-  graph = ExchangeGraph(parts_snd,parts_rcv)
-  t1 = exchange(gi_snd,graph)
-  t2 = exchange(gj_snd,graph)
-  t3 = exchange(jo_snd,graph)
-  t4 = exchange(v_snd,graph)
-  @async begin
-      gi_rcv = fetch(t1)
-      gj_rcv = fetch(t2)
-      jo_rcv = fetch(t3)
-      v_rcv = fetch(t4)
-      map(setup_rcv!,coo_entries_with_column_owner,gi_rcv,gj_rcv,jo_rcv,v_rcv)
-      I,J,Jown,V
+
+  function async_callback(b)
+    # now we can assemble contributions
+    assemble!(b)
   end
-end
 
-Base.wait(t::Matrix) = map(wait,t)
+  A,b = _sa_create_from_nz_with_callback(callback,async_callback,a)
+  return A,b
+end
 
-# dofs_gids_prange can be either test_dofs_gids_prange or trial_dofs_gids_prange
-# In the former case, gids is a vector of global test dof identifiers, while in the 
-# latter, a vector of global trial dof identifiers
-function _setup_prange(dofs_gids_prange::PRange,gids;ghost=true,owners=nothing,kwargs...)
-  if !ghost
-    _setup_prange_without_ghosts(dofs_gids_prange)
-  elseif isa(owners,Nothing)
-    _setup_prange_with_ghosts(dofs_gids_prange,gids)
-  else
-    _setup_prange_with_ghosts(dofs_gids_prange,gids,owners)
-  end
+struct ArrayAllocationTrackTouchedAndValues{A}
+  touched::Vector{Bool}
+  values::A
 end
 
-function _setup_prange(dofs_gids_prange::AbstractVector{<:PRange},
-                       gids::AbstractMatrix;
-                       ax=:rows,ghost=true,owners=nothing)
-  @check ax ∈ (:rows,:cols)
-  block_ids = LinearIndices(dofs_gids_prange)
-
-  gids_ax_slice, _owners = map(block_ids,dofs_gids_prange) do id,prange
-    gids_ax_slice = (ax == :rows) ? gids[id,:] : gids[:,id]
-    _owners = nothing
-    if ghost
-      gids_ax_slice = map(x -> union(x...), to_parray_of_arrays(gids_ax_slice))
-      if !isa(owners,Nothing) # Recompute owners for the union
-        _owners = get_gid_owners(gids_ax_slice,prange)
-      end
-    end
-    return gids_ax_slice, _owners
-  end |> tuple_of_arrays
-  
-  return map((p,g,o) -> _setup_prange(p,g;ghost=ghost,owners=o),dofs_gids_prange,gids_ax_slice,_owners)
-end
-
-# Create PRange for the rows of the linear system
-# without local ghost dofs as per required in the 
-# FullyAssembledRows() parallel assembly strategy 
-function _setup_prange_without_ghosts(dofs_gids_prange::PRange)
-  ngdofs = length(dofs_gids_prange)
-  indices = map(partition(dofs_gids_prange)) do dofs_indices 
-    owner = part_id(dofs_indices)
-    own_indices = OwnIndices(ngdofs,owner,own_to_global(dofs_indices))
-    ghost_indices = GhostIndices(ngdofs,Int64[],Int32[])
-    OwnAndGhostIndices(own_indices,ghost_indices)
-  end
-  return PRange(indices)
-end
-
-# Here we are assuming that the sparse communication graph underlying test_dofs_gids_partition
-# is a superset of the one underlying indices. This is (has to be) true for the rows of the linear system.
-# The precondition required for the consistency of any parallel assembly process in GridapDistributed 
-# is that each processor can determine locally with a single layer of ghost cells the global indices and associated 
-# processor owners of the rows that it touches after assembly of integration terms posed on locally-owned entities 
-# (i.e., either cells or faces). 
-function _setup_prange_with_ghosts(dofs_gids_prange::PRange,gids)
-  ngdofs = length(dofs_gids_prange)
-  dofs_gids_partition = partition(dofs_gids_prange)
-
-  # Selected ghost ids -> dof PRange ghost ids
-  gids_ghost_lids_to_dofs_ghost_lids = map(ghost_lids_touched,dofs_gids_partition,gids)
-
-  # Selected ghost ids -> [global dof ids, owner processor ids]
-  gids_ghost_to_global, gids_ghost_to_owner = map(
-    find_gid_and_owner,gids_ghost_lids_to_dofs_ghost_lids,dofs_gids_partition) |> tuple_of_arrays
-
-  return _setup_prange_impl_(ngdofs,dofs_gids_partition,gids_ghost_to_global,gids_ghost_to_owner)
-end
-
-# Here we cannot assume that the sparse communication graph underlying 
-# trial_dofs_gids_partition is a superset of the one underlying indices.
-# Here we chould check whether it is included and call _find_neighbours!()
-# if this is the case. At present, we are not taking advantage of this, 
-# but let the parallel scalable algorithm to compute the reciprocal to do the job. 
-function _setup_prange_with_ghosts(dofs_gids_prange::PRange,gids,owners)
-  ngdofs = length(dofs_gids_prange)
-  dofs_gids_partition = partition(dofs_gids_prange)
-
-  # Selected ghost ids -> [global dof ids, owner processor ids]
-  gids_ghost_to_global, gids_ghost_to_owner = map(
-    gids,owners,dofs_gids_partition) do gids, owners, indices
-    ghost_touched   = Dict{Int,Bool}()
-    ghost_to_global = Int64[] 
-    ghost_to_owner  = Int64[]
-    me = part_id(indices)
-    for (j,jo) in zip(gids,owners)
-      if jo != me
-        if !haskey(ghost_touched,j)
-          push!(ghost_to_global,j)
-          push!(ghost_to_owner,jo)
-          ghost_touched[j] = true
-        end
-      end
-    end
-    ghost_to_global, ghost_to_owner
-  end |> tuple_of_arrays
-
-  return _setup_prange_impl_(ngdofs,
-                             dofs_gids_partition,
-                             gids_ghost_to_global,
-                             gids_ghost_to_owner;
-                             discover_neighbours=false)
-end 
-
-function _setup_prange_impl_(ngdofs,
-                             dofs_gids_partition,
-                             gids_ghost_to_global,
-                             gids_ghost_to_owner;
-                             discover_neighbours=true)
-  indices = map(dofs_gids_partition, 
-                gids_ghost_to_global, 
-                gids_ghost_to_owner) do dofs_indices, ghost_to_global, ghost_to_owner 
-     owner = part_id(dofs_indices)
-     own_indices   = OwnIndices(ngdofs,owner,own_to_global(dofs_indices))
-     ghost_indices = GhostIndices(ngdofs,ghost_to_global,ghost_to_owner)
-     OwnAndGhostIndices(own_indices,ghost_indices)
-  end
-  if discover_neighbours
-    _find_neighbours!(indices,dofs_gids_partition)
-  end
-  return PRange(indices)
-end
-
-function Algebra.create_from_nz(a::PVectorAllocationTrackOnlyValues{<:FullyAssembledRows})
-  rows = _setup_prange_without_ghosts(a.test_dofs_gids_prange)
-  _rhs_callback(a,rows)
-end
-
-function Algebra.create_from_nz(a::PVectorAllocationTrackOnlyValues{<:SubAssembledRows})
-  # This point MUST NEVER be reached. If reached there is an inconsistency
-  # in the parallel code in charge of vector assembly
-  @assert false
-end
-
-function _rhs_callback(row_partitioned_vector_partition,rows)
-  # The ghost values in row_partitioned_vector_partition are 
-  # aligned with the FESpace but not with the ghost values in the rows of A
-  b_fespace = PVector(row_partitioned_vector_partition.values,
-                      partition(row_partitioned_vector_partition.test_dofs_gids_prange))
-
-  # This one is aligned with the rows of A
-  b = similar(b_fespace,eltype(b_fespace),(rows,))
-
-  # First transfer owned values
-  b .= b_fespace
-
-  # Now transfer ghost
-  function transfer_ghost(b,b_fespace,ids,ids_fespace)
-    num_ghosts_vec = ghost_length(ids)
-    gho_to_loc_vec = ghost_to_local(ids)
-    loc_to_glo_vec = local_to_global(ids)
-    gid_to_lid_fe  = global_to_local(ids_fespace)
-    for ghost_lid_vec in 1:num_ghosts_vec
-      lid_vec     = gho_to_loc_vec[ghost_lid_vec]
-      gid         = loc_to_glo_vec[lid_vec]
-      lid_fespace = gid_to_lid_fe[gid]
-      b[lid_vec] = b_fespace[lid_fespace]
-    end
-  end
-  map(
-    transfer_ghost,
-    partition(b),
-    partition(b_fespace),
-    b.index_partition,
-    b_fespace.index_partition)
-
-  return b
-end
-
-function Algebra.create_from_nz(a::PVector)
-  assemble!(a) |> wait
-  return a
-end
-
-function Algebra.create_from_nz(
-  a::DistributedAllocationCOO{<:FullyAssembledRows},
-  c_fespace::PVectorAllocationTrackOnlyValues{<:FullyAssembledRows})
-
-  function callback(rows)
-    _rhs_callback(c_fespace,rows)
-  end
-
-  A,b = _fa_create_from_nz_with_callback(callback,a)
-  return A,b
-end
-
-struct PVectorAllocationTrackTouchedAndValues{A,B,C}
-  allocations::A
-  values::B
-  test_dofs_gids_prange::C
-end
-
-function Algebra.create_from_nz(
-  a::DistributedAllocationCOO{<:SubAssembledRows},
-  c_fespace::PVectorAllocationTrackOnlyValues{<:SubAssembledRows})
-
-  function callback(rows)
-    _rhs_callback(c_fespace,rows)
-  end
-
-  function async_callback(b)
-    # now we can assemble contributions
-    assemble!(b)
-  end
-
-  A,b = _sa_create_from_nz_with_callback(callback,async_callback,a)
-  return A,b
-end
-
-struct ArrayAllocationTrackTouchedAndValues{A}
-  touched::Vector{Bool}
-  values::A
-end
-
-Gridap.Algebra.LoopStyle(::Type{<:ArrayAllocationTrackTouchedAndValues}) = Gridap.Algebra.Loop()
+Gridap.Algebra.LoopStyle(::Type{<:ArrayAllocationTrackTouchedAndValues}) = Gridap.Algebra.Loop()
 
 
 function local_views(a::PVectorAllocationTrackTouchedAndValues,rows)
@@ -1066,3 +766,353 @@ function Algebra.create_from_nz(a::PVectorAllocationTrackTouchedAndValues)
   end
   return b
 end
+
+
+# Common Assembly Utilities
+
+function first_gdof_from_ids(ids)
+  if own_length(ids) == 0
+    return 1
+  end
+  lid_to_gid   = local_to_global(ids) 
+  owned_to_lid = own_to_local(ids)
+  return Int(lid_to_gid[first(owned_to_lid)])
+end
+
+function find_gid_and_owner(ighost_to_jghost,jindices)
+  jghost_to_local  = ghost_to_local(jindices)
+  jlocal_to_global = local_to_global(jindices)
+  jlocal_to_owner  = local_to_owner(jindices)
+  ighost_to_jlocal = view(jghost_to_local,ighost_to_jghost)
+
+  ighost_to_global = jlocal_to_global[ighost_to_jlocal]
+  ighost_to_owner  = jlocal_to_owner[ighost_to_jlocal]
+  return ighost_to_global, ighost_to_owner
+end
+
+# The given ids are assumed to be a sub-set of the lids
+function ghost_lids_touched(a::AbstractLocalIndices,gids::AbstractVector{<:Integer})
+  glo_to_loc = global_to_local(a)
+  loc_to_gho = local_to_ghost(a)
+  
+  # First pass: Allocate
+  i = 0
+  ghost_lids_touched = fill(false,ghost_length(a))
+  for gid in gids
+    lid = glo_to_loc[gid]
+    ghost_lid = loc_to_gho[lid]
+    if ghost_lid > 0 && !ghost_lids_touched[ghost_lid]
+      ghost_lids_touched[ghost_lid] = true
+      i += 1
+    end
+  end
+  gids_ghost_lid_to_ghost_lid = Vector{Int32}(undef,i)
+
+  # Second pass: fill 
+  i = 1
+  fill!(ghost_lids_touched,false)
+  for gid in gids
+    lid = glo_to_loc[gid]
+    ghost_lid = loc_to_gho[lid]
+    if ghost_lid > 0 && !ghost_lids_touched[ghost_lid]
+      ghost_lids_touched[ghost_lid] = true
+      gids_ghost_lid_to_ghost_lid[i] = ghost_lid
+      i += 1
+    end
+  end
+
+  return gids_ghost_lid_to_ghost_lid
+end
+
+# Find the neighbours of partition1 trying 
+# to use those in partition2 as a hint 
+function _find_neighbours!(partition1, partition2)
+  partition2_snd, partition2_rcv = assembly_neighbors(partition2)
+  partition2_graph = ExchangeGraph(partition2_snd, partition2_rcv)
+  return assembly_neighbors(partition1; neighbors=partition2_graph)
+end
+
+# to_global! & to_local! analogs, needed for dispatching in block assembly
+
+function to_local_indices!(I,ids::PRange;kwargs...)
+  map(to_local!,I,partition(ids))
+end
+
+function to_global_indices!(I,ids::PRange;kwargs...)
+  map(to_global!,I,partition(ids))
+end
+
+function get_gid_owners(I,ids::PRange;kwargs...)
+  map(I,partition(ids)) do I, indices 
+    glo_to_loc = global_to_local(indices) 
+    loc_to_own = local_to_owner(indices)
+    map(x->loc_to_own[glo_to_loc[x]], I)
+  end 
+end
+
+for f in [:to_local_indices!, :to_global_indices!, :get_gid_owners]
+  @eval begin
+    function $f(I::Vector,ids::AbstractVector{<:PRange};kwargs...)
+      map($f,I,ids)
+    end
+
+    function $f(I::Matrix,ids::AbstractVector{<:PRange};ax=:rows)
+      @check ax ∈ [:rows,:cols]
+      block_ids = CartesianIndices(I)
+      map(block_ids) do id
+        i = id[1]; j = id[2];
+        if ax == :rows
+          $f(I[i,j],ids[i])
+        else
+          $f(I[i,j],ids[j])
+        end
+      end
+    end
+  end
+end
+
+# _setup_matrix : local matrices + global PRanges -> Global matrix
+
+function _setup_matrix(values,rows::PRange,cols::PRange)
+  return PSparseMatrix(values,partition(rows),partition(cols))
+end
+
+function _setup_matrix(values,rows::Vector{<:PRange},cols::Vector{<:PRange})
+  block_ids  = CartesianIndices((length(rows),length(cols)))
+  block_mats = map(block_ids) do I
+    block_values = map(v -> blocks(v)[I],values)
+    return _setup_matrix(block_values,rows[I[1]],cols[I[2]])
+  end
+  return mortar(block_mats)
+end
+
+# _assemble_coo! : local coo triplets + global PRange -> Global coo values
+
+function _assemble_coo!(I,J,V,rows::PRange;owners=nothing)
+  if isa(owners,Nothing)
+    PArrays.assemble_coo!(I,J,V,partition(rows))
+  else
+    assemble_coo_with_column_owner!(I,J,V,partition(rows),owners)
+  end
+end
+
+function _assemble_coo!(I,J,V,rows::Vector{<:PRange};owners=nothing)
+  block_ids = CartesianIndices(I)
+  map(block_ids) do id
+    i = id[1]; j = id[2];
+    if isa(owners,Nothing)
+      _assemble_coo!(I[i,j],J[i,j],V[i,j],rows[i])
+    else
+      _assemble_coo!(I[i,j],J[i,j],V[i,j],rows[i],owners=owners[i,j])
+    end
+  end
+end
+
+function assemble_coo_with_column_owner!(I,J,V,row_partition,Jown)
+  """
+    Returns three JaggedArrays with the coo triplets
+    to be sent to the corresponding owner parts in parts_snd
+  """
+  function setup_snd(part,parts_snd,row_lids,coo_entries_with_column_owner)
+    global_to_local_row = global_to_local(row_lids)
+    local_row_to_owner = local_to_owner(row_lids)
+    owner_to_i = Dict(( owner=>i for (i,owner) in enumerate(parts_snd) ))
+    ptrs = zeros(Int32,length(parts_snd)+1)
+    k_gi, k_gj, k_jo, k_v = coo_entries_with_column_owner
+    for k in 1:length(k_gi)
+      gi = k_gi[k]
+      li = global_to_local_row[gi]
+      owner = local_row_to_owner[li]
+      if owner != part
+        ptrs[owner_to_i[owner]+1] +=1
+      end
+    end
+    PArrays.length_to_ptrs!(ptrs)
+    gi_snd_data = zeros(eltype(k_gi),ptrs[end]-1)
+    gj_snd_data = zeros(eltype(k_gj),ptrs[end]-1)
+    jo_snd_data = zeros(eltype(k_jo),ptrs[end]-1)
+    v_snd_data = zeros(eltype(k_v),ptrs[end]-1)
+    for k in 1:length(k_gi)
+      gi = k_gi[k]
+      li = global_to_local_row[gi]
+      owner = local_row_to_owner[li]
+      if owner != part
+        gj = k_gj[k]
+        v = k_v[k]
+        p = ptrs[owner_to_i[owner]]
+        gi_snd_data[p] = gi
+        gj_snd_data[p] = gj
+        jo_snd_data[p] = k_jo[k]
+        v_snd_data[p]  = v
+        k_v[k] = zero(v)
+        ptrs[owner_to_i[owner]] += 1
+      end
+    end
+    PArrays.rewind_ptrs!(ptrs)
+    gi_snd = JaggedArray(gi_snd_data,ptrs)
+    gj_snd = JaggedArray(gj_snd_data,ptrs)
+    jo_snd = JaggedArray(jo_snd_data,ptrs)
+    v_snd = JaggedArray(v_snd_data,ptrs)
+    gi_snd, gj_snd, jo_snd, v_snd
+  end
+  """
+    Pushes to coo_entries_with_column_owner the tuples 
+    gi_rcv,gj_rcv,jo_rcv,v_rcv received from remote processes
+  """
+  function setup_rcv!(coo_entries_with_column_owner,gi_rcv,gj_rcv,jo_rcv,v_rcv)
+    k_gi, k_gj, k_jo, k_v = coo_entries_with_column_owner
+    current_n = length(k_gi)
+    new_n = current_n + length(gi_rcv.data)
+    resize!(k_gi,new_n)
+    resize!(k_gj,new_n)
+    resize!(k_jo,new_n)
+    resize!(k_v,new_n)
+    for p in 1:length(gi_rcv.data)
+        k_gi[current_n+p] = gi_rcv.data[p]
+        k_gj[current_n+p] = gj_rcv.data[p]
+        k_jo[current_n+p] = jo_rcv.data[p]
+        k_v[current_n+p] = v_rcv.data[p]
+    end
+  end
+  part = linear_indices(row_partition)
+  parts_snd, parts_rcv = assembly_neighbors(row_partition)
+  coo_entries_with_column_owner = map(tuple,I,J,Jown,V)
+  gi_snd, gj_snd, jo_snd, v_snd = map(setup_snd,part,parts_snd,row_partition,coo_entries_with_column_owner) |> tuple_of_arrays
+  graph = ExchangeGraph(parts_snd,parts_rcv)
+  t1 = exchange(gi_snd,graph)
+  t2 = exchange(gj_snd,graph)
+  t3 = exchange(jo_snd,graph)
+  t4 = exchange(v_snd,graph)
+  @async begin
+      gi_rcv = fetch(t1)
+      gj_rcv = fetch(t2)
+      jo_rcv = fetch(t3)
+      v_rcv = fetch(t4)
+      map(setup_rcv!,coo_entries_with_column_owner,gi_rcv,gj_rcv,jo_rcv,v_rcv)
+      I,J,Jown,V
+  end
+end
+
+Base.wait(t::Matrix) = map(wait,t)
+
+# dofs_gids_prange can be either test_dofs_gids_prange or trial_dofs_gids_prange
+# In the former case, gids is a vector of global test dof identifiers, while in the 
+# latter, a vector of global trial dof identifiers
+function _setup_prange(dofs_gids_prange::PRange,gids;ghost=true,owners=nothing,kwargs...)
+  if !ghost
+    _setup_prange_without_ghosts(dofs_gids_prange)
+  elseif isa(owners,Nothing)
+    _setup_prange_with_ghosts(dofs_gids_prange,gids)
+  else
+    _setup_prange_with_ghosts(dofs_gids_prange,gids,owners)
+  end
+end
+
+function _setup_prange(dofs_gids_prange::AbstractVector{<:PRange},
+                       gids::AbstractMatrix;
+                       ax=:rows,ghost=true,owners=nothing)
+  @check ax ∈ (:rows,:cols)
+  block_ids = LinearIndices(dofs_gids_prange)
+
+  gids_ax_slice, _owners = map(block_ids,dofs_gids_prange) do id,prange
+    gids_ax_slice = (ax == :rows) ? gids[id,:] : gids[:,id]
+    _owners = nothing
+    if ghost
+      gids_ax_slice = map(x -> union(x...), to_parray_of_arrays(gids_ax_slice))
+      if !isa(owners,Nothing) # Recompute owners for the union
+        _owners = get_gid_owners(gids_ax_slice,prange)
+      end
+    end
+    return gids_ax_slice, _owners
+  end |> tuple_of_arrays
+  
+  return map((p,g,o) -> _setup_prange(p,g;ghost=ghost,owners=o),dofs_gids_prange,gids_ax_slice,_owners)
+end
+
+# Create PRange for the rows of the linear system
+# without local ghost dofs as per required in the 
+# FullyAssembledRows() parallel assembly strategy 
+function _setup_prange_without_ghosts(dofs_gids_prange::PRange)
+  ngdofs = length(dofs_gids_prange)
+  indices = map(partition(dofs_gids_prange)) do dofs_indices 
+    owner = part_id(dofs_indices)
+    own_indices = OwnIndices(ngdofs,owner,own_to_global(dofs_indices))
+    ghost_indices = GhostIndices(ngdofs,Int64[],Int32[])
+    OwnAndGhostIndices(own_indices,ghost_indices)
+  end
+  return PRange(indices)
+end
+
+# Here we are assuming that the sparse communication graph underlying test_dofs_gids_partition
+# is a superset of the one underlying indices. This is (has to be) true for the rows of the linear system.
+# The precondition required for the consistency of any parallel assembly process in GridapDistributed 
+# is that each processor can determine locally with a single layer of ghost cells the global indices and associated 
+# processor owners of the rows that it touches after assembly of integration terms posed on locally-owned entities 
+# (i.e., either cells or faces). 
+function _setup_prange_with_ghosts(dofs_gids_prange::PRange,gids)
+  ngdofs = length(dofs_gids_prange)
+  dofs_gids_partition = partition(dofs_gids_prange)
+
+  # Selected ghost ids -> dof PRange ghost ids
+  gids_ghost_lids_to_dofs_ghost_lids = map(ghost_lids_touched,dofs_gids_partition,gids)
+
+  # Selected ghost ids -> [global dof ids, owner processor ids]
+  gids_ghost_to_global, gids_ghost_to_owner = map(
+    find_gid_and_owner,gids_ghost_lids_to_dofs_ghost_lids,dofs_gids_partition) |> tuple_of_arrays
+
+  return _setup_prange_impl_(ngdofs,dofs_gids_partition,gids_ghost_to_global,gids_ghost_to_owner)
+end
+
+# Here we cannot assume that the sparse communication graph underlying 
+# trial_dofs_gids_partition is a superset of the one underlying indices.
+# Here we chould check whether it is included and call _find_neighbours!()
+# if this is the case. At present, we are not taking advantage of this, 
+# but let the parallel scalable algorithm to compute the reciprocal to do the job. 
+function _setup_prange_with_ghosts(dofs_gids_prange::PRange,gids,owners)
+  ngdofs = length(dofs_gids_prange)
+  dofs_gids_partition = partition(dofs_gids_prange)
+
+  # Selected ghost ids -> [global dof ids, owner processor ids]
+  gids_ghost_to_global, gids_ghost_to_owner = map(
+    gids,owners,dofs_gids_partition) do gids, owners, indices
+    ghost_touched   = Dict{Int,Bool}()
+    ghost_to_global = Int64[] 
+    ghost_to_owner  = Int64[]
+    me = part_id(indices)
+    for (j,jo) in zip(gids,owners)
+      if jo != me
+        if !haskey(ghost_touched,j)
+          push!(ghost_to_global,j)
+          push!(ghost_to_owner,jo)
+          ghost_touched[j] = true
+        end
+      end
+    end
+    ghost_to_global, ghost_to_owner
+  end |> tuple_of_arrays
+
+  return _setup_prange_impl_(ngdofs,
+                             dofs_gids_partition,
+                             gids_ghost_to_global,
+                             gids_ghost_to_owner;
+                             discover_neighbours=false)
+end 
+
+function _setup_prange_impl_(ngdofs,
+                             dofs_gids_partition,
+                             gids_ghost_to_global,
+                             gids_ghost_to_owner;
+                             discover_neighbours=true)
+  indices = map(dofs_gids_partition, 
+                gids_ghost_to_global, 
+                gids_ghost_to_owner) do dofs_indices, ghost_to_global, ghost_to_owner 
+    owner = part_id(dofs_indices)
+    own_indices   = OwnIndices(ngdofs,owner,own_to_global(dofs_indices))
+    ghost_indices = GhostIndices(ngdofs,ghost_to_global,ghost_to_owner)
+    OwnAndGhostIndices(own_indices,ghost_indices)
+  end
+  if discover_neighbours
+    _find_neighbours!(indices,dofs_gids_partition)
+  end
+  return PRange(indices)
+end
diff --git a/src/MultiField.jl b/src/MultiField.jl
index bd08ea75..a4ae661e 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -527,46 +527,3 @@ function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::DistributedBlockSparseM
   cols = get_cols(a)
   map(numeric_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
 end
-
-# Assembly 
-
-function get_allocations(a::ArrayBlock{<:DistributedAllocationCOO})
-  tuple_of_array_of_parrays = map(get_allocations,a.array) |> tuple_of_arrays
-  #tuple_of_parray_of_arrays = map(to_parray_of_arrays,tuple_of_array_of_parrays)
-  return tuple_of_array_of_parrays
-end
-
-function get_test_gids(a::ArrayBlock{<:DistributedAllocationCOO})
-  return map(get_test_gids,diag(a.array))
-end
-
-function get_trial_gids(a::ArrayBlock{<:DistributedAllocationCOO})
-  return map(get_trial_gids,diag(a.array))
-end
-
-function change_axes(a::MatrixBlock{<:DistributedAllocationCOO},axes::Tuple{<:Vector,<:Vector})
-  block_ids  = CartesianIndices(a.array)
-  rows, cols = axes
-
-  array = map(block_ids) do I
-    change_axes(a[I],(rows[I[1]],cols[I[2]]))
-  end
-  return ArrayBlock(array,a.touched)
-end
-
-function local_views(a::MatrixBlock{<:DistributedAllocationCOO})
-  array = map(local_views,a.array) |> to_parray_of_arrays
-  return map(ai -> ArrayBlock(ai,a.touched),array)
-end
-
-function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:FullyAssembledRows}})
-  f(x) = nothing
-  A, = _fa_create_from_nz_with_callback(f,a)
-  return A
-end
-
-function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:SubAssembledRows}})
-  f(x) = nothing
-  A, = _sa_create_from_nz_with_callback(f,f,a)
-  return A
-end

From 7bc9706aef2ae8b9556420c84fbcb1bda973c470 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 22 Aug 2023 19:12:08 +1000
Subject: [PATCH 25/56] Added block assemblers tests to testset

---
 test/BlockSparseMatrixAssemblersTests.jl      | 122 +++++++++---------
 test/TestApp/src/TestApp.jl                   |   1 +
 test/mpi/runtests_np4_body.jl                 |   3 +
 .../BlockSparseMatrixAssemblersTests.jl       |  17 +++
 test/sequential/runtests.jl                   |   3 +
 5 files changed, 87 insertions(+), 59 deletions(-)
 create mode 100644 test/sequential/BlockSparseMatrixAssemblersTests.jl

diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
index 3666dde2..400b0183 100644
--- a/test/BlockSparseMatrixAssemblersTests.jl
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -53,72 +53,76 @@ function is_same_matrix(Ab::BlockMatrix,A::PSparseMatrix,Xb,X)
   return is_same_vector(yb,y,Xb,X)
 end
 
-nparts = (2,2)
-parts = with_debug() do distribute
-  distribute(LinearIndices((prod(nparts),)))
+function _main(n_spaces,mfs,weakform,Ω,dΩ,U,V)
+  biform, liform = weakform
+
+  # Normal assembly 
+  Y = MultiFieldFESpace(fill(V,n_spaces))
+  X = MultiFieldFESpace(fill(U,n_spaces))
+
+  u = get_trial_fe_basis(X)
+  v = get_fe_basis(Y)
+
+  data = collect_cell_matrix_and_vector(X,Y,biform(u,v),liform(v))
+  matdata = collect_cell_matrix(X,Y,biform(u,v))
+  vecdata = collect_cell_vector(Y,liform(v))  
+
+  assem = SparseMatrixAssembler(X,Y,FullyAssembledRows())
+  A1 = assemble_matrix(assem,matdata)
+  b1 = assemble_vector(assem,vecdata)
+  A2,b2 = assemble_matrix_and_vector(assem,data);
+
+  # Block Assembly
+  Yb  = MultiFieldFESpace(fill(V,n_spaces);style=mfs)
+  Xb  = MultiFieldFESpace(fill(U,n_spaces);style=mfs)
+
+  ub = get_trial_fe_basis(Xb)
+  vb = get_fe_basis(Yb)
+
+  bdata = collect_cell_matrix_and_vector(Xb,Yb,biform(ub,vb),liform(vb))
+  bmatdata = collect_cell_matrix(Xb,Yb,biform(ub,vb))
+  bvecdata = collect_cell_vector(Yb,liform(vb))
+
+  assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
+  A1_blocks = assemble_matrix(assem_blocks,bmatdata);
+  b1_blocks = assemble_vector(assem_blocks,bvecdata);
+  @test is_same_vector(b1_blocks,b1,Yb,Y)
+  @test is_same_matrix(A1_blocks,A1,Xb,X)
+
+  A2_blocks, b2_blocks = assemble_matrix_and_vector(assem_blocks,bdata)
+  @test is_same_vector(b2_blocks,b2,Yb,Y)
+  @test is_same_matrix(A2_blocks,A2,Xb,X)
+
+  op = AffineFEOperator(biform,liform,X,Y)
+  block_op = AffineFEOperator(biform,liform,Xb,Yb)
+  @test is_same_vector(get_vector(block_op),get_vector(op),Yb,Y)
+  @test is_same_matrix(get_matrix(block_op),get_matrix(op),Xb,X)
 end
 
-sol(x) = sum(x)
-
-model = CartesianDiscreteModel(parts,nparts,(0.0,1.0,0.0,1.0),(8,8))
-Ω = Triangulation(model)
-
-reffe = LagrangianRefFE(Float64,QUAD,1)
-V = FESpace(Ω, reffe)
-U = TrialFESpace(sol,V)
-
-dΩ = Measure(Ω, 4)
-biform((u1,u2,u3),(v1,v2,v3)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 + u2⋅v1 + v3⋅u3 + v3⋅u1 + v1⋅u3)*dΩ
-liform((v1,v2,v3)) = ∫(v1 + v2 + v3)*dΩ
-#biform((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 - u2⋅v1)*dΩ
-#liform((v1,v2)) = ∫(v1 + v2)*dΩ
-
-############################################################################################
-# Normal assembly 
-
-Y = MultiFieldFESpace([V,V,V])
-X = MultiFieldFESpace([U,U,U])
-
-u = get_trial_fe_basis(X)
-v = get_fe_basis(Y)
-
-data = collect_cell_matrix_and_vector(X,Y,biform(u,v),liform(v))
-matdata = collect_cell_matrix(X,Y,biform(u,v))
-vecdata = collect_cell_vector(Y,liform(v))  
-
-assem = SparseMatrixAssembler(X,Y,FullyAssembledRows())
-A1 = assemble_matrix(assem,matdata)
-b1 = assemble_vector(assem,vecdata)
-A2,b2 = assemble_matrix_and_vector(assem,data);
-
 ############################################################################################
-# Block MultiFieldStyle
 
-#mfs = BlockMultiFieldStyle()
-mfs = BlockMultiFieldStyle(2,(1,2))
+function main(distribute,parts)
+  ranks = distribute(LinearIndices((prod(parts),)))
 
-Yb  = MultiFieldFESpace([V,V,V];style=mfs)
-Xb  = MultiFieldFESpace([U,U,U];style=mfs)
+  model = CartesianDiscreteModel(ranks,parts,(0,1,0,1),(8,8))
+  Ω = Triangulation(model)
 
-ub = get_trial_fe_basis(Xb)
-vb = get_fe_basis(Yb)
+  sol(x) = sum(x)
+  reffe  = LagrangianRefFE(Float64,QUAD,1)
+  V = FESpace(Ω, reffe; dirichlet_tags="boundary")
+  U = TrialFESpace(sol,V)
 
-bdata = collect_cell_matrix_and_vector(Xb,Yb,biform(ub,vb),liform(vb))
-bmatdata = collect_cell_matrix(Xb,Yb,biform(ub,vb))
-bvecdata = collect_cell_vector(Yb,liform(vb))
+  dΩ = Measure(Ω, 2)
+  biform2((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2)*dΩ
+  liform2((v1,v2)) = ∫(v1 + v2)*dΩ
+  biform3((u1,u2,u3),(v1,v2,v3)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 + u3⋅v2 + u2⋅v3)*dΩ
+  liform3((v1,v2,v3)) = ∫(v1 + v2 + v3)*dΩ
 
-############################################################################################
-# Block Assembly
-
-assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
-A1_blocks = assemble_matrix(assem_blocks,bmatdata);
-b1_blocks = assemble_vector(assem_blocks,bvecdata);
-is_same_vector(b1_blocks,b1,Yb,Y)
-is_same_matrix(A1_blocks,A1,Xb,X)
-
-op = AffineFEOperator(biform,liform,X,Y)
-block_op = AffineFEOperator(biform,liform,Xb,Yb)
-is_same_vector(get_vector(block_op),get_vector(op),Yb,Y)
-is_same_matrix(get_matrix(block_op),get_matrix(op),Xb,X)
+  for (n_spaces,weakform) in zip([2,3],[(biform2,liform2),(biform3,liform3)])
+    for mfs in [BlockMultiFieldStyle(),BlockMultiFieldStyle(2,(1,n_spaces-1))]
+      _main(n_spaces,mfs,weakform,Ω,dΩ,U,V)
+    end
+  end
+end
 
 end
\ No newline at end of file
diff --git a/test/TestApp/src/TestApp.jl b/test/TestApp/src/TestApp.jl
index f6c40936..2e5aed87 100644
--- a/test/TestApp/src/TestApp.jl
+++ b/test/TestApp/src/TestApp.jl
@@ -12,4 +12,5 @@ module TestApp
   include("../../HeatEquationTests.jl")
   include("../../StokesOpenBoundaryTests.jl")
   include("../../AdaptivityTests.jl")
+  include("../../BlockSparseMatrixAssemblersTests.jl")
 end
\ No newline at end of file
diff --git a/test/mpi/runtests_np4_body.jl b/test/mpi/runtests_np4_body.jl
index 0e8fbb6a..70f9c01c 100644
--- a/test/mpi/runtests_np4_body.jl
+++ b/test/mpi/runtests_np4_body.jl
@@ -45,5 +45,8 @@ function all_tests(distribute,parts)
     PArrays.toc!(t,"Adaptivity")
   end
 
+  TestApp.BlockSparseMatrixAssemblersTests.main(distribute,parts)
+  PArrays.toc!(t,"BlockSparseMatrixAssemblers")
+
   display(t)
 end
diff --git a/test/sequential/BlockSparseMatrixAssemblersTests.jl b/test/sequential/BlockSparseMatrixAssemblersTests.jl
new file mode 100644
index 00000000..9c60126d
--- /dev/null
+++ b/test/sequential/BlockSparseMatrixAssemblersTests.jl
@@ -0,0 +1,17 @@
+module BlockSparseMatrixAssemblersTestsSeq
+using PartitionedArrays
+include("../BlockSparseMatrixAssemblersTests.jl")
+
+with_debug() do distribute
+  BlockSparseMatrixAssemblersTests.main(distribute,(2,2))
+end
+
+with_debug() do distribute
+  BlockSparseMatrixAssemblersTests.main(distribute,(2,1))
+end
+
+with_debug() do distribute
+  BlockSparseMatrixAssemblersTests.main(distribute,(1,2))
+end
+
+end # module
\ No newline at end of file
diff --git a/test/sequential/runtests.jl b/test/sequential/runtests.jl
index 12f82145..57da04ef 100644
--- a/test/sequential/runtests.jl
+++ b/test/sequential/runtests.jl
@@ -34,5 +34,8 @@ end
 
 @time @testset "StokesHdivDGTests.jl" begin include("StokesHdivDGTests.jl") end
 
+@time @testset "BlockSparseMatrixAssemblers" begin 
+  include("BlockSparseMatrixAssemblersTests.jl") 
+end
 
 end # module

From aee8566429d7c8255e738e113e2e6c7a9e20d4e7 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 22 Aug 2023 19:17:02 +1000
Subject: [PATCH 26/56] Updated NEWS.md

---
 NEWS.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index d268845a..59fce9b5 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## Unreleased
+
+### Added
+
+- Added support for distributed block-assembly. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
+
 ## [0.3.0] - 2023-08-16
 
 ### Changed

From 25ece1f8b88c119d5967a97387612d313fc05bee Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 22 Aug 2023 19:21:40 +1000
Subject: [PATCH 27/56] Updated TestApp dependencies

---
 test/TestApp/.gitignore   | 1 +
 test/TestApp/Project.toml | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 test/TestApp/.gitignore

diff --git a/test/TestApp/.gitignore b/test/TestApp/.gitignore
new file mode 100644
index 00000000..ba39cc53
--- /dev/null
+++ b/test/TestApp/.gitignore
@@ -0,0 +1 @@
+Manifest.toml
diff --git a/test/TestApp/Project.toml b/test/TestApp/Project.toml
index 9754ac89..31c314de 100644
--- a/test/TestApp/Project.toml
+++ b/test/TestApp/Project.toml
@@ -3,6 +3,7 @@ uuid = "3ba29202-0f57-4e69-8cbd-5c57d4c4860a"
 version = "0.1.0"
 
 [deps]
+BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
 Gridap = "56d4f2e9-7ea1-5844-9cf6-b9c51ca7ce8e"
 GridapDistributed = "f9701e48-63b3-45aa-9a63-9bc6c271f355"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

From 66510e41ca70f8d66fd8caa3b5c3ea9a01389135 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Fri, 25 Aug 2023 18:55:43 +1000
Subject: [PATCH 28/56] Improvements to change_ghost

---
 src/Algebra.jl | 68 ++++++++++++++++++++++++--------------------------
 1 file changed, 32 insertions(+), 36 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index 58f3b63f..58651237 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -5,14 +5,14 @@ end
 
 # This might go to Gridap in the future. We keep it here for the moment.
 function change_axes(a::Algebra.CounterCOO{T,A}, axes::A) where {T,A}
-  b=Algebra.CounterCOO{T}(axes)
+  b = Algebra.CounterCOO{T}(axes)
   b.nnz = a.nnz
   b
 end
 
 # This might go to Gridap in the future. We keep it here for the moment.
 function change_axes(a::Algebra.AllocationCOO{T,A}, axes::A) where {T,A}
-  counter=change_axes(a.counter,axes)
+  counter = change_axes(a.counter,axes)
   Algebra.AllocationCOO(counter,a.I,a.J,a.V)
 end
 
@@ -112,8 +112,8 @@ function local_views(a)
   @abstractmethod
 end
 
-function get_parts(x)
-  return linear_indices(local_views(x))
+function get_parts(a)
+  return linear_indices(local_views(a))
 end
 
 function local_views(a::AbstractVector,rows)
@@ -124,10 +124,6 @@ function local_views(a::AbstractMatrix,rows,cols)
   @notimplemented
 end
 
-function consistent_local_views(a,ids,isconsistent)
-  @abstractmethod
-end
-
 function local_views(a::AbstractArray)
   a
 end
@@ -144,6 +140,32 @@ function local_views(a::PSparseMatrix)
   partition(a)
 end
 
+# change_ghost
+
+function change_ghost(a::PVector{T},ids::PRange;is_consistent=false,make_consistent=false) where T
+  same_partition = (a.index_partition === partition(ids))
+  a_new = same_partition ? a : change_ghost(T,a,ids)
+  if make_consistent && (!same_partition || !is_consistent)
+    consistent!(a_new) |> wait
+  end
+  return a_new
+end
+
+function change_ghost(::Type{<:AbstractVector},a::PVector,ids::PRange)
+  a_new = similar(a,eltype(a),(ids,))
+  # Equivalent to copy!(a_new,a) but does not check that owned indices match
+  map(copy!,own_values(a_new),own_values(a))
+  return a_new
+end
+
+function change_ghost(::Type{<:OwnAndGhostVectors},a::PVector,ids::PRange)
+  values = map(own_values(a),partition(ids)) do own_vals,ids
+    ghost_vals = fill(zero(eltype(a)),ghost_length(ids))
+    OwnAndGhostVectors(own_vals,ghost_vals,ids)
+  end
+  return PVector(values,partition(ids))
+end
+
 # This function computes a mapping among the local identifiers of a and b
 # for which the corresponding global identifiers are both in a and b. 
 # Note that the haskey check is necessary because in the general case 
@@ -235,36 +257,10 @@ function local_views(row_col_partitioned_matrix::PSparseMatrix,
     end
 end
 
-function change_ghost(a::PVector,ids_fespace::PRange)
-  if a.index_partition === partition(ids_fespace)
-    a_fespace = a
-  else
-    a_fespace = similar(a,eltype(a),(ids_fespace,))
-    a_fespace .= a
-  end
-  a_fespace
-end
-
-function consistent_local_views(a::PVector,
-                                ids_fespace::PRange,
-                                isconsistent)
-  a_fespace = change_ghost(a,ids_fespace)
-  if ! isconsistent
-    fetch_vector_ghost_values!(partition(a_fespace),
-                               map(reverse,a_fespace.cache)) |> wait
-  end
-  partition(a_fespace)
+function Algebra.allocate_vector(::Type{<:PVector{T}},ids::PRange) where {T}
+  PVector{T}(undef,partition(ids))
 end
 
-function Algebra.allocate_vector(::Type{<:PVector{V,A}},ids::PRange) where {V,A}
-  values = map(partition(ids)) do ids
-    Tv = eltype(A)
-    Tv(undef,length(local_to_owner(ids)))
-  end
-  PVector(values,partition(ids))
-end
-
-
 # PSparseMatrix assembly
 
 struct FullyAssembledRows end

From d42b6f74e54c92d978034d3a2bf840b2a19cc645 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Fri, 25 Aug 2023 18:56:26 +1000
Subject: [PATCH 29/56] Changes to MultiField

- We now save block PRanges for reuse later
- Modified constructors for SparseMatrixAssemblers
---
 src/FESpaces.jl   | 92 +++++++++++++++++++++++-----------------------
 src/MultiField.jl | 93 +++++++++++++++++++++++++----------------------
 2 files changed, 95 insertions(+), 90 deletions(-)

diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index 9f52a492..858d843e 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -119,11 +119,15 @@ end
 function fetch_vector_ghost_values_cache(vector_partition,partition)
   cache = PArrays.p_vector_cache(vector_partition,partition)
   map(reverse,cache)
-end 
+end
 
 function fetch_vector_ghost_values!(vector_partition,cache)
   assemble!((a,b)->b, vector_partition, cache) 
-end 
+end
+
+function change_ghost(a::PVector,f::DistributedFESpace)
+  change_ghost(a,f.gids)
+end
 
 function generate_gids(
   cell_range::PRange,
@@ -325,18 +329,18 @@ function FESpaces.EvaluationFunction(
 end
 
 function _EvaluationFunction(func,
-  f::DistributedSingleFieldFESpace,free_values::AbstractVector,isconsistent=false)
-  local_vals = consistent_local_views(free_values,f.gids,isconsistent)
-  fields = map(func,f.spaces,local_vals)
+  f::DistributedSingleFieldFESpace,x::AbstractVector,isconsistent=false)
+  free_values = change_ghost(x,f.gids,is_consistent=isconsistent,make_consistent=true)
+  fields   = map(func,f.spaces,partition(free_values))
   metadata = DistributedFEFunctionData(free_values)
   DistributedCellField(fields,metadata)
 end
 
 function _EvaluationFunction(func,
-  f::DistributedSingleFieldFESpace,free_values::AbstractVector,
+  f::DistributedSingleFieldFESpace,x::AbstractVector,
   dirichlet_values::AbstractArray{<:AbstractVector},isconsistent=false)
-  local_vals = consistent_local_views(free_values,f.gids,isconsistent)
-  fields = map(func,f.spaces,local_vals,dirichlet_values)
+  free_values = change_ghost(x,f.gids,is_consistent=isconsistent,make_consistent=true)
+  fields   = map(func,f.spaces,partition(free_values),dirichlet_values)
   metadata = DistributedFEFunctionData(free_values)
   DistributedCellField(fields,metadata)
 end
@@ -485,24 +489,16 @@ function FESpaces.FESpace(_trian::DistributedTriangulation,reffe;kwargs...)
   DistributedSingleFieldFESpace(spaces,gids,vector_type)
 end
 
-function _find_vector_type(spaces,gids)
-  # TODO Now the user can select the local vector type but not the global one
+function _find_vector_type(spaces,gids;own_and_ghost=false)
+  # TODO: Now the user can select the local vector type but not the global one
   # new kw-arg global_vector_type ?
-  # we use PVector for the moment
   local_vector_type = get_vector_type(PartitionedArrays.getany(spaces))
-
-  if local_vector_type <: BlockVector
-    T = eltype(local_vector_type)
-    A = typeof(map(i->Vector{T}(undef,0),partition(gids)))
-    B = typeof(gids)
-    vector_type = PVector{T,A,B}
-  else
-    T = eltype(local_vector_type)
-    A = typeof(map(i->local_vector_type(undef,0),partition(gids)))
-    B = typeof(gids)
-    vector_type = PVector{T,A,B}
+  Tv = eltype(local_vector_type)
+  T  = Vector{Tv}
+  if own_and_ghost
+    T = OwnAndGhostVectors{T}
   end
-
+  vector_type = typeof(PVector{T}(undef,partition(gids)))
   return vector_type
 end
 
@@ -651,6 +647,27 @@ function local_assembly_strategy(::FullyAssembledRows,test_space_indices,trial_s
 end
 
 # Assembler high level constructors
+function FESpaces.SparseMatrixAssembler(
+  local_mat_type,
+  local_vec_type,
+  rows::PRange,
+  cols::PRange,
+  par_strategy=SubAssembledRows())
+
+  assems = map(partition(rows),partition(cols)) do rows,cols
+    local_strategy = local_assembly_strategy(par_strategy,rows,cols)
+    FESpaces.GenericSparseMatrixAssembler(SparseMatrixBuilder(local_mat_type),
+                                          ArrayBuilder(local_vec_type),
+                                          Base.OneTo(length(rows)),
+                                          Base.OneTo(length(cols)),
+                                          local_strategy)
+  end
+
+  mat_builder = PSparseMatrixBuilderCOO(local_mat_type,par_strategy)
+  vec_builder = PVectorBuilder(local_vec_type,par_strategy)
+  return DistributedSparseMatrixAssembler(par_strategy,assems,mat_builder,vec_builder,rows,cols)
+end
+
 function FESpaces.SparseMatrixAssembler(
   local_mat_type,
   local_vec_type,
@@ -658,25 +675,9 @@ function FESpaces.SparseMatrixAssembler(
   test::DistributedFESpace,
   par_strategy=SubAssembledRows())
 
-  Tv = local_vec_type
-  T = eltype(Tv)
-  Tm = local_mat_type
-  trial_dofs_gids_partition = partition(trial.gids)
-  test_dofs_gids_partition = partition(test.gids)
-  assems = map(local_views(test),local_views(trial),test_dofs_gids_partition,trial_dofs_gids_partition) do v,u,trial_gids_partition,test_gids_partition
-    local_strategy = local_assembly_strategy(par_strategy,trial_gids_partition,test_gids_partition)
-    SparseMatrixAssembler(Tm,Tv,u,v,local_strategy)
-  end
-  matrix_builder = PSparseMatrixBuilderCOO(Tm,par_strategy)
-  vector_builder = PVectorBuilder(Tv,par_strategy)
-  test_dofs_gids_prange = get_free_dof_ids(test)
-  trial_dofs_gids_prange = get_free_dof_ids(trial)
-  DistributedSparseMatrixAssembler(par_strategy,
-                                   assems,
-                                   matrix_builder,
-                                   vector_builder,
-                                   test_dofs_gids_prange,
-                                   trial_dofs_gids_prange)
+  rows = get_free_dof_ids(test)
+  cols = get_free_dof_ids(trial)
+  SparseMatrixAssembler(local_mat_type,local_vec_type,rows,cols,par_strategy)
 end
 
 function FESpaces.SparseMatrixAssembler(
@@ -684,11 +685,8 @@ function FESpaces.SparseMatrixAssembler(
   test::DistributedFESpace,
   par_strategy=SubAssembledRows())
 
-  Tv = typeof(Int)
-  map(local_views(trial)) do trial
-    Tv = get_vector_type(trial)
-  end
-  T = eltype(Tv)
+  Tv = PartitionedArrays.getany(map(get_vector_type,local_views(trial)))
+  T  = eltype(Tv)
   Tm = SparseMatrixCSC{T,Int}
   SparseMatrixAssembler(Tm,Tv,trial,test,par_strategy)
 end
diff --git a/src/MultiField.jl b/src/MultiField.jl
index a4ae661e..e7858d00 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -29,21 +29,24 @@ local_views(a::Vector{<:DistributedCellField}) = [ai.fields for ai in a]
 
 """
 """
-struct DistributedMultiFieldFESpace{MS,A,B,C,D} <: DistributedFESpace
+struct DistributedMultiFieldFESpace{MS,A,B,C,D,E} <: DistributedFESpace
   multi_field_style::MS
   field_fe_space::A
   part_fe_space::B
   gids::C
-  vector_type::Type{D}
+  block_gids::D
+  vector_type::Type{E}
   function DistributedMultiFieldFESpace(
     field_fe_space::AbstractVector{<:DistributedSingleFieldFESpace},
     part_fe_space::AbstractArray{<:MultiFieldFESpace{MS}},
     gids::PRange,
-    vector_type::Type{D}) where {D,MS}
+    block_gids,
+    vector_type::Type{E}) where {E,MS}
     A = typeof(field_fe_space)
     B = typeof(part_fe_space)
     C = typeof(gids)
-    new{MS,A,B,C,D}(MS(),field_fe_space,part_fe_space,gids,vector_type)
+    D = typeof(block_gids)
+    new{MS,A,B,C,D,E}(MS(),field_fe_space,part_fe_space,gids,block_gids,vector_type)
   end
 end
 
@@ -85,16 +88,22 @@ function MultiField.restrict_to_field(
   PVector(values,partition(gids))
 end
 
+function change_ghost(x::BlockVector,
+                      X::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}}) where {NB,SB,P}
+  array = map(X.block_gids,blocks(x)) do gids, xi
+    change_ghost(xi,gids)
+  end
+  return mortar(array)
+end
+
 #function FESpaces.zero_free_values(f::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle})
 #  return mortar(map(zero_free_values,f.field_fe_space))
 #end
 
 function FESpaces.FEFunction(
   f::DistributedMultiFieldFESpace,x::AbstractVector,isconsistent=false)
-  free_values = change_ghost(x,f.gids)
-  # This will cause also the single-field components to be consistent
-  local_vals = consistent_local_views(free_values,f.gids,isconsistent)
-  part_fe_fun = map(FEFunction,f.part_fe_space,local_vals)
+  free_values  = change_ghost(x,f.gids;is_consistent=isconsistent,make_consistent=true)
+  part_fe_fun  = map(FEFunction,f.part_fe_space,partition(free_values))
   field_fe_fun = DistributedSingleFieldFEFunction[]
   for i in 1:num_fields(f)
     free_values_i = restrict_to_field(f,free_values,i)
@@ -107,10 +116,8 @@ end
 
 function FESpaces.EvaluationFunction(
   f::DistributedMultiFieldFESpace,x::AbstractVector,isconsistent=false)
-  free_values = change_ghost(x,f.gids)
-  # This will cause also the single-field components to be consistent
-  local_vals = consistent_local_views(free_values,f.gids,false)
-  part_fe_fun = map(EvaluationFunction,f.part_fe_space,local_vals)
+  free_values  = change_ghost(x,f.gids;is_consistent=isconsistent,make_consistent=true)
+  part_fe_fun  = map(EvaluationFunction,f.part_fe_space,partition(free_values))
   field_fe_fun = DistributedSingleFieldFEFunction[]
   for i in 1:num_fields(f)
     free_values_i = restrict_to_field(f,free_values,i)
@@ -127,8 +134,10 @@ function FESpaces.interpolate(objects,fe::DistributedMultiFieldFESpace)
 end
 
 function FESpaces.interpolate!(objects,free_values::AbstractVector,fe::DistributedMultiFieldFESpace)
-  local_vals = consistent_local_views(free_values,fe.gids,true)
-  part_fe_fun = map(local_vals,local_views(fe)) do x,f
+  msg = "free_values and fe have incompatible index partitions."
+  @check partition(axes(free_values,1)) === partition(fe.gids) msg
+
+  part_fe_fun = map(partition(free_values),local_views(fe)) do x,f
     interpolate!(objects,x,f)
   end
   field_fe_fun = DistributedSingleFieldFEFunction[]
@@ -143,8 +152,7 @@ end
 
 function FESpaces.interpolate_everywhere(objects,fe::DistributedMultiFieldFESpace)
   free_values = zero_free_values(fe)
-  local_vals = consistent_local_views(free_values,fe.gids,true)
-  part_fe_fun = map(local_vals,local_views(fe)) do x,f
+  part_fe_fun = map(partition(free_values),local_views(fe)) do x,f
     interpolate!(objects,x,f)
   end
   field_fe_fun = DistributedSingleFieldFEFunction[]
@@ -162,8 +170,10 @@ function FESpaces.interpolate_everywhere!(
   objects,free_values::AbstractVector,
   dirichlet_values::Vector{AbstractArray{<:AbstractVector}},
   fe::DistributedMultiFieldFESpace)
-  local_vals = consistent_local_views(free_values,fe.gids,true)
-  part_fe_fun = map(local_vals,local_views(fe)) do x,f
+  msg = "free_values and fe have incompatible index partitions."
+  @check partition(axes(free_values,1)) === partition(fe.gids) msg
+  
+  part_fe_fun = map(partition(free_values),local_views(fe)) do x,f
     interpolate!(objects,x,f)
   end
   field_fe_fun = DistributedSingleFieldFEFunction[]
@@ -179,7 +189,7 @@ end
 
 function FESpaces.interpolate_everywhere(
   objects::Vector{<:DistributedCellField},fe::DistributedMultiFieldFESpace)
-  local_objects = local_views(objects)
+  local_objects = map(local_views,objects)
   local_spaces = local_views(fe)
   part_fe_fun = map(local_spaces,local_objects...) do f,o...
     interpolate_everywhere(o,f)
@@ -249,10 +259,11 @@ function FESpaces.TrialFESpace(a::DistributedMultiFieldFESpace,objects)
   f_p_space = map(local_views,f_dspace)
   v(x...) = collect(x)
   p_f_space = map(v,f_p_space...)
-  p_mspace = map(MultiFieldFESpace,p_f_space)
+  p_mspace  = map(MultiFieldFESpace,p_f_space)
   gids = a.gids
+  block_gids  = a.block_gids
   vector_type = a.vector_type
-  DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,vector_type)
+  DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,block_gids,vector_type)
 end
 
 # Factory
@@ -266,7 +277,11 @@ function MultiField.MultiFieldFESpace(
   p_mspace    = map(f->MultiFieldFESpace(f;kwargs...),p_f_space)
   gids        = generate_multi_field_gids(f_dspace,p_mspace)
   vector_type = _find_vector_type(p_mspace,gids)
-  DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,vector_type)
+
+  style = MultiFieldStyle(PartitionedArrays.getany(p_mspace))
+  block_gids = _generate_block_gids(style,f_dspace)
+
+  DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,block_gids,vector_type)
 end
 
 function generate_multi_field_gids(
@@ -402,6 +417,16 @@ function propagate_to_ghost_multifield!(
   end
 end
 
+_generate_block_gids(::MultiFieldStyle,f_dspace) = nothing
+
+function _generate_block_gids(::BlockMultiFieldStyle{NB,SB,P},f_dspace) where {NB,SB,P}
+  block_ranges = MultiField.get_block_ranges(NB,SB,P)
+  block_gids = map(block_ranges) do range
+    space = (length(range) == 1) ? f_dspace[range[1]] : MultiFieldFESpace(f_dspace[range])
+    get_free_dof_ids(space)
+  end
+  return block_gids
+end
 
 # BlockSparseMatrixAssemblers
 
@@ -415,34 +440,16 @@ function FESpaces.SparseMatrixAssembler(
   test::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}},
   par_strategy=SubAssembledRows()) where {NB,SB,P}
 
-  # Build block spaces
-  function get_block_fespace(spaces,range)
-    (length(range) == 1) ? spaces[range[1]] : MultiFieldFESpace(spaces[range])
-  end
-  block_ranges = MultiField.get_block_ranges(NB,SB,P)
-  block_tests  = map(range -> get_block_fespace(test.field_fe_space,range),block_ranges)
-  block_trials = map(range -> get_block_fespace(trial.field_fe_space,range),block_ranges)
-
   block_idx = CartesianIndices((NB,NB))
   block_assemblers = map(block_idx) do idx
-    Yi = block_tests[idx[1]]; Xj = block_trials[idx[2]]
-    return SparseMatrixAssembler(local_mat_type,local_vec_type,Xj,Yi,par_strategy)
+    rows = test.block_gids[idx[1]]; cols = trial.block_gids[idx[2]]
+    return SparseMatrixAssembler(local_mat_type,local_vec_type,rows,cols,par_strategy)
   end
 
-  NV = length(trial.field_fe_space)
+  NV = length(P)
   return MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}(block_assemblers)
 end
 
-function FESpaces.SparseMatrixAssembler(
-  trial::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle},
-  test ::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle},
-  par_strategy=SubAssembledRows())
-  Tv = get_vector_type(PartitionedArrays.getany(local_views(first(trial))))
-  T  = eltype(Tv)
-  Tm = SparseMatrixCSC{T,Int}
-  SparseMatrixAssembler(Tm,Tv,trial,test,par_strategy)
-end
-
 # Array of PArrays -> PArray of Arrays 
 function to_parray_of_arrays(a::AbstractArray{<:MPIArray})
   indices = linear_indices(first(a))

From 661ee0f03fd91452f62f3f444db684e099756125 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Fri, 25 Aug 2023 18:58:48 +1000
Subject: [PATCH 30/56] Update tests

---
 test/BlockSparseMatrixAssemblersTests.jl | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
index 400b0183..c93ad0e8 100644
--- a/test/BlockSparseMatrixAssemblersTests.jl
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -18,17 +18,6 @@ function LinearAlgebra.mul!(y::BlockVector,A::BlockMatrix,x::BlockVector)
   end
 end
 
-function GridapDistributed.change_ghost(
-  x::BlockVector,
-  X::GridapDistributed.DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}}) where {NB,SB,P}
-  block_ranges = MultiField.get_block_ranges(NB,SB,P)
-  array = map(block_ranges,blocks(x)) do range, xi
-    Xi = (length(range) == 1) ? X.field_fe_space[range[1]] : MultiFieldFESpace(X.field_fe_space[range])
-    GridapDistributed.change_ghost(xi,Xi.gids)
-  end
-  return mortar(array)
-end
-
 function is_same_vector(x::BlockVector,y::PVector,Ub,U)
   y_fespace = GridapDistributed.change_ghost(y,U.gids)
   x_fespace = GridapDistributed.change_ghost(x,Ub)

From bdb6b6227db6872415bc95b56d42529c5c1b9584 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Fri, 25 Aug 2023 19:04:30 +1000
Subject: [PATCH 31/56] Move to_parray_of_arrays to Algebra.jl

---
 src/Algebra.jl    | 19 +++++++++++++++++++
 src/MultiField.jl | 19 -------------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index 58651237..c1f0ff79 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -16,6 +16,25 @@ function change_axes(a::Algebra.AllocationCOO{T,A}, axes::A) where {T,A}
   Algebra.AllocationCOO(counter,a.I,a.J,a.V)
 end
 
+# Array of PArrays -> PArray of Arrays 
+function to_parray_of_arrays(a::AbstractArray{<:MPIArray})
+  indices = linear_indices(first(a))
+  map(indices) do i
+    map(a) do aj
+      PartitionedArrays.getany(aj)
+    end
+  end
+end
+
+function to_parray_of_arrays(a::AbstractArray{<:DebugArray})
+  indices = linear_indices(first(a))
+  map(indices) do i
+    map(a) do aj
+      aj.items[i]
+    end
+  end
+end
+
 # This type is required because MPIArray from PArrays 
 # cannot be instantiated with a NULL communicator
 struct MPIVoidVector{T} <: AbstractVector{T}
diff --git a/src/MultiField.jl b/src/MultiField.jl
index e7858d00..44a82999 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -450,25 +450,6 @@ function FESpaces.SparseMatrixAssembler(
   return MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}(block_assemblers)
 end
 
-# Array of PArrays -> PArray of Arrays 
-function to_parray_of_arrays(a::AbstractArray{<:MPIArray})
-  indices = linear_indices(first(a))
-  map(indices) do i
-    map(a) do aj
-      PartitionedArrays.getany(aj)
-    end
-  end
-end
-
-function to_parray_of_arrays(a::AbstractArray{<:DebugArray})
-  indices = linear_indices(first(a))
-  map(indices) do i
-    map(a) do aj
-      aj.items[i]
-    end
-  end
-end
-
 function local_views(a::MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}) where {NB,NV,SB,P}
   assems = a.block_assemblers
   array = to_parray_of_arrays(map(local_views,assems))

From 9787cce8119b7884e657ff410fa61d08a4409b9a Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Mon, 28 Aug 2023 11:26:35 +1000
Subject: [PATCH 32/56] Bugfix: change_ghosts wrong for OwnAndGhostVectors

---
 src/Algebra.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index c1f0ff79..f884e56d 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -180,7 +180,8 @@ end
 function change_ghost(::Type{<:OwnAndGhostVectors},a::PVector,ids::PRange)
   values = map(own_values(a),partition(ids)) do own_vals,ids
     ghost_vals = fill(zero(eltype(a)),ghost_length(ids))
-    OwnAndGhostVectors(own_vals,ghost_vals,ids)
+    perm = PartitionedArrays.local_permutation(ids)
+    OwnAndGhostVectors(own_vals,ghost_vals,perm)
   end
   return PVector(values,partition(ids))
 end

From 9c674fd071e3d0f608fea7104e490c5073eaca12 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Mon, 28 Aug 2023 11:28:14 +1000
Subject: [PATCH 33/56] Added kwarg own_and_ghost for FESpaces

---
 src/FESpaces.jl   | 26 ++++++++------------------
 src/MultiField.jl |  4 ++--
 2 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index 858d843e..0c2fc2bd 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -177,7 +177,6 @@ function generate_gids(
                          cell_to_ldofs,
                          cell_range)
 
-
   # Find the global range of owned dofs
   first_gdof = scan(+,nodofs,type=:exclusive,init=one(eltype(nodofs)))
   
@@ -467,16 +466,16 @@ end
 
 # Factories
 
-function FESpaces.FESpace(model::DistributedDiscreteModel,reffe;kwargs...)
+function FESpaces.FESpace(model::DistributedDiscreteModel,reffe;own_and_ghost=false,kwargs...)
   spaces = map(local_views(model)) do m
     FESpace(m,reffe;kwargs...)
   end
   gids =  generate_gids(model,spaces)
-  vector_type = _find_vector_type(spaces,gids)
+  vector_type = _find_vector_type(spaces,gids;own_and_ghost=own_and_ghost)
   DistributedSingleFieldFESpace(spaces,gids,vector_type)
 end
 
-function FESpaces.FESpace(_trian::DistributedTriangulation,reffe;kwargs...)
+function FESpaces.FESpace(_trian::DistributedTriangulation,reffe;own_and_ghost=false,kwargs...)
   trian = add_ghost_cells(_trian)
   trian_gids = generate_cell_gids(trian)
   spaces = map(trian.trians) do t
@@ -485,13 +484,11 @@ function FESpaces.FESpace(_trian::DistributedTriangulation,reffe;kwargs...)
   cell_to_ldofs = map(get_cell_dof_ids,spaces)
   nldofs = map(num_free_dofs,spaces)
   gids = generate_gids(trian_gids,cell_to_ldofs,nldofs)
-  vector_type = _find_vector_type(spaces,gids)
+  vector_type = _find_vector_type(spaces,gids;own_and_ghost=own_and_ghost)
   DistributedSingleFieldFESpace(spaces,gids,vector_type)
 end
 
 function _find_vector_type(spaces,gids;own_and_ghost=false)
-  # TODO: Now the user can select the local vector type but not the global one
-  # new kw-arg global_vector_type ?
   local_vector_type = get_vector_type(PartitionedArrays.getany(spaces))
   Tv = eltype(local_vector_type)
   T  = Vector{Tv}
@@ -508,17 +505,12 @@ function FESpaces.collect_cell_matrix(
   trial::DistributedFESpace,
   test::DistributedFESpace,
   a::DistributedDomainContribution)
-  map(
-    collect_cell_matrix,
-    local_views(trial),
-    local_views(test),
-    local_views(a))
+  map(collect_cell_matrix,local_views(trial),local_views(test),local_views(a))
 end
 
 function FESpaces.collect_cell_vector(
   test::DistributedFESpace, a::DistributedDomainContribution)
-  map(
-    collect_cell_vector,local_views(test),local_views(a))
+  map(collect_cell_vector,local_views(test),local_views(a))
 end
 
 function FESpaces.collect_cell_matrix_and_vector(
@@ -559,8 +551,7 @@ function FESpaces.collect_cell_matrix_and_vector(
   test::DistributedFESpace,
   mat::DistributedDomainContribution,
   l::Number)
-  map(
-    local_views(trial),local_views(test),local_views(mat)) do u,v,m
+  map(local_views(trial),local_views(test),local_views(mat)) do u,v,m
     collect_cell_matrix_and_vector(u,v,m,l)
   end
 end
@@ -571,8 +562,7 @@ function FESpaces.collect_cell_matrix_and_vector(
   mat::DistributedDomainContribution,
   l::Number,
   uhd)
-  map(
-    local_views(trial),local_views(test),local_views(mat),local_views(uhd)) do u,v,m,f
+  map(local_views(trial),local_views(test),local_views(mat),local_views(uhd)) do u,v,m,f
     collect_cell_matrix_and_vector(u,v,m,l,f)
   end
 end
diff --git a/src/MultiField.jl b/src/MultiField.jl
index 44a82999..3027082e 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -269,14 +269,14 @@ end
 # Factory
 
 function MultiField.MultiFieldFESpace(
-  f_dspace::Vector{<:DistributedSingleFieldFESpace};kwargs...)
+  f_dspace::Vector{<:DistributedSingleFieldFESpace};own_and_ghost=false, kwargs...)
   f_p_space = map(local_views,f_dspace)
   v(x...) = collect(x)
 
   p_f_space   = map(v,f_p_space...)
   p_mspace    = map(f->MultiFieldFESpace(f;kwargs...),p_f_space)
   gids        = generate_multi_field_gids(f_dspace,p_mspace)
-  vector_type = _find_vector_type(p_mspace,gids)
+  vector_type = _find_vector_type(p_mspace,gids;own_and_ghost=own_and_ghost)
 
   style = MultiFieldStyle(PartitionedArrays.getany(p_mspace))
   block_gids = _generate_block_gids(style,f_dspace)

From 580266d0146513d391d7122cf09d15ad34b4bea8 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 29 Aug 2023 12:53:12 +1000
Subject: [PATCH 34/56] Implemented BlockPartitionedArrays

---
 src/Algebra.jl                |   2 -
 src/BlockPartitionedArrays.jl | 274 ++++++++++++++++++++++++++++++++++
 src/GridapDistributed.jl      |   3 +
 3 files changed, 277 insertions(+), 2 deletions(-)
 create mode 100644 src/BlockPartitionedArrays.jl

diff --git a/src/Algebra.jl b/src/Algebra.jl
index f884e56d..37837857 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -1009,8 +1009,6 @@ function assemble_coo_with_column_owner!(I,J,V,row_partition,Jown)
   end
 end
 
-Base.wait(t::Matrix) = map(wait,t)
-
 # dofs_gids_prange can be either test_dofs_gids_prange or trial_dofs_gids_prange
 # In the former case, gids is a vector of global test dof identifiers, while in the 
 # latter, a vector of global trial dof identifiers
diff --git a/src/BlockPartitionedArrays.jl b/src/BlockPartitionedArrays.jl
new file mode 100644
index 00000000..969a5f3c
--- /dev/null
+++ b/src/BlockPartitionedArrays.jl
@@ -0,0 +1,274 @@
+
+"""
+"""
+struct BlockPRange{A} <: AbstractUnitRange{Int}
+  ranges::Vector{PRange{A}}
+  function BlockPRange(ranges::Vector{<:PRange{A}}) where A
+    new{A}(ranges)
+  end
+end
+
+Base.first(a::BlockPRange) = 1
+Base.last(a::BlockPRange) = sum(map(last,a.ranges))
+
+BlockArrays.blocklength(a::BlockPRange) = length(a.ranges)
+BlockArrays.blocksize(a::BlockPRange) = (blocklength(a),)
+BlockArrays.blockaxes(a::BlockPRange) = (Block.(Base.OneTo(blocklength(a))),)
+BlockArrays.blocks(a::BlockPRange) = a.ranges
+
+"""
+"""
+struct BlockPArray{T,N,A,B} <: BlockArrays.AbstractBlockArray{T,N}
+  blocks::Array{A,N}
+  axes::NTuple{N,B}
+
+  function BlockPArray(blocks::Array{<:AbstractArray{T,N},N},
+                       axes  ::NTuple{N,<:BlockPRange}) where {T,N}
+    @check all(map(d->size(blocks,d)==blocklength(axes[d]),1:N))
+    A = eltype(blocks)
+    B = typeof(first(axes))
+    new{T,N,A,B}(blocks,axes)
+  end
+end
+
+const BlockPVector{T,A,B} = BlockPArray{T,1,A,B}
+const BlockPMatrix{T,A,B} = BlockPArray{T,2,A,B}
+
+@inline function BlockPVector(blocks::Vector{<:PVector},rows::BlockPRange)
+  BlockPArray(blocks,(rows,))
+end
+
+@inline function BlockPVector(blocks::Vector{<:PVector},rows::Vector{<:PRange})
+  BlockPVector(blocks,BlockPRange(rows))
+end
+
+@inline function BlockPMatrix(blocks::Matrix{<:PSparseMatrix},rows::BlockPRange,cols::BlockPRange)
+  BlockPArray(blocks,(rows,cols))
+end
+
+@inline function BlockPMatrix(blocks::Matrix{<:PSparseMatrix},rows::Vector{<:PRange},cols::Vector{<:PRange})
+  BlockPMatrix(blocks,BlockPRange(rows),BlockPRange(cols))
+end
+
+# AbstractArray API
+
+Base.axes(a::BlockPArray) = a.axes
+Base.size(a::BlockPArray) = Tuple(map(length,a.axes))
+
+Base.IndexStyle(::Type{<:BlockPVector}) = IndexLinear()
+Base.IndexStyle(::Type{<:BlockPMatrix}) = IndexCartesian()
+
+function Base.similar(a::BlockPVector,::Type{T},inds::Tuple{<:BlockPRange}) where T
+  vals = map(blocks(a),blocks(inds[1])) do ai,i
+    similar(ai,T,i)
+  end
+  return BlockPArray(vals,inds)
+end
+
+function Base.similar(::Type{<:BlockPVector{T,A}},inds::Tuple{<:BlockPRange}) where {T,A}
+  rows   = blocks(inds[1])
+  values = map(rows) do r
+    return similar(A,(r,))
+  end
+  return BlockPArray(values,inds)
+end
+
+function Base.similar(a::BlockPMatrix,::Type{T},inds::Tuple{<:BlockPRange,<:BlockPRange}) where T
+  vals = map(CartesianIndices(blocksize(a))) do I
+    rows = inds[1].ranges[I[1]]
+    cols = inds[2].ranges[I[2]]
+    similar(a.blocks[I],T,(rows,cols))
+  end
+  return BlockPArray(vals,inds)
+end
+
+function Base.similar(::Type{<:BlockPMatrix{T,A}},inds::Tuple{<:BlockPRange,<:BlockPRange}) where {T,A}
+  rows = blocks(inds[1])
+  cols = blocks(inds[2])
+  values = map(CartesianIndices((length(rows),length(cols)))) do I
+    i,j = I[1],I[2]
+    return similar(A,(rows[i],cols[j]))
+  end
+  return BlockPArray(values,inds)
+end
+
+function Base.getindex(a::BlockPArray{T,N},inds::Vararg{Int,N}) where {T,N}
+  @error "Scalar indexing not supported"
+end
+function Base.setindex(a::BlockPArray{T,N},v,inds::Vararg{Int,N}) where {T,N}
+  @error "Scalar indexing not supported"
+end
+
+function Base.show(io::IO,k::MIME"text/plain",data::BlockPArray{T,N}) where {T,N}
+  v = first(blocks(data))
+  s = prod(map(si->"$(si)x",blocksize(data)))[1:end-1]
+  map_main(partition(v)) do values
+      println(io,"$s-block BlockPArray{$T,$N}")
+  end
+end
+
+function Base.zero(v::BlockPArray)
+  return mortar(map(zero,blocks(v)))
+end
+
+function Base.copyto!(y::BlockPVector,x::BlockPVector)
+  @check blocklength(x) == blocklength(y)
+  for i in blockaxes(x,1)
+    copyto!(y[i],x[i])
+  end
+  return y
+end
+
+function Base.copyto!(y::BlockPMatrix,x::BlockPMatrix)
+  @check blocksize(x) == blocksize(y)
+  for i in blockaxes(x,1)
+    for j in blockaxes(x,2)
+      copyto!(y[i,j],x[i,j])
+    end
+  end
+  return y
+end
+
+function Base.fill!(a::BlockPVector,v)
+  map(blocks(a)) do a
+    fill!(a,v)
+  end
+  return a
+end
+
+# AbstractBlockArray API
+
+BlockArrays.blocks(a::BlockPArray) = a.blocks
+
+function Base.getindex(a::BlockPArray,inds::Block{1})
+  a.blocks[inds.n...]
+end
+function Base.getindex(a::BlockPArray{T,N},inds::Block{N}) where {T,N}
+  a.blocks[inds.n...]
+end
+function Base.getindex(a::BlockPArray{T,N},inds::Vararg{Block{1},N}) where {T,N}
+  a.blocks[map(i->i.n[1],inds)...]
+end
+
+function BlockArrays.mortar(blocks::Vector{<:PVector})
+  rows = map(b->axes(b,1),blocks)
+  BlockPVector(blocks,rows)
+end
+
+function BlockArrays.mortar(blocks::Matrix{<:PSparseMatrix})
+  rows = map(b->axes(b,1),blocks[:,1])
+  cols = map(b->axes(b,2),blocks[1,:])
+
+  function check_axes(a,r,c)
+    A = matching_local_indices(axes(a,1),r)
+    B = matching_local_indices(axes(a,2),c)
+    return A & B
+  end
+  @check all(map(I -> check_axes(blocks[I],rows[I[1]],cols[I[2]]),CartesianIndices(size(blocks))))
+
+  return BlockPMatrix(blocks,rows,cols)
+end
+
+# PartitionedArrays API
+
+Base.wait(t::Array)  = map(wait,t)
+Base.fetch(t::Array) = map(fetch,t)
+
+function PartitionedArrays.assemble!(a::BlockPArray)
+  map(assemble!,blocks(a))
+end
+
+function PartitionedArrays.consistent!(a::BlockPArray)
+  map(consistent!,blocks(a))
+end
+
+function PartitionedArrays.partition(a::BlockPArray)
+  return map(partition,blocks(a)) |> to_parray_of_arrays
+end
+
+function PartitionedArrays.to_trivial_partition(a::BlockPArray)
+  vals = map(to_trivial_partition,blocks(a))
+  return mortar(vals)
+end
+
+# LinearAlgebra API
+
+function LinearAlgebra.mul!(y::BlockPVector,A::BlockPMatrix,x::BlockPVector)
+  o = one(eltype(A))
+  for i in blockaxes(A,2)
+    fill!(y[i],0.0)
+    for j in blockaxes(A,2)
+      mul!(y[i],A[i,j],x[j],o,o)
+    end
+  end
+end
+
+function LinearAlgebra.dot(x::BlockPVector,y::BlockPVector)
+  return sum(map(dot,blocks(x),blocks(y)))
+end
+
+function LinearAlgebra.norm(v::BlockPVector)
+  block_norms = map(norm,blocks(v))
+  return sqrt(sum(block_norms.^2))
+end
+
+function LinearAlgebra.fillstored!(a::BlockPMatrix,v)
+  map(blocks(a)) do a
+    fillstored!(a,v)
+  end
+  return a
+end
+
+# Broadcasting
+
+struct BlockPBroadcasted{A,B}
+  blocks :: A
+  axes   :: B
+end
+
+BlockArrays.blocks(b::BlockPBroadcasted) = b.blocks
+BlockArrays.blockaxes(b::BlockPBroadcasted) = b.axes
+
+function Base.broadcasted(f, args::Union{BlockPVector,BlockPBroadcasted}...)
+  a1 = first(args)
+  @boundscheck @assert all(ai -> blockaxes(ai) == blockaxes(a1),args)
+  
+  blocks_in = map(blocks,args)
+  blocks_out = map((largs...)->Base.broadcasted(f,largs...),blocks_in...)
+  
+  return BlockPBroadcasted(blocks_out,blockaxes(a1))
+end
+
+function Base.broadcasted(f, a::Number, b::Union{BlockPVector,BlockPBroadcasted})
+  blocks_out = map(b->Base.broadcasted(f,a,b),blocks(b))
+  return BlockPBroadcasted(blocks_out,blockaxes(b))
+end
+
+function Base.broadcasted(f, a::Union{BlockPVector,BlockPBroadcasted}, b::Number)
+  blocks_out = map(a->Base.broadcasted(f,a,b),blocks(a))
+  return BlockPBroadcasted(blocks_out,blockaxes(a))
+end
+
+function Base.broadcasted(f,
+                        a::Union{BlockPVector,BlockPBroadcasted},
+                        b::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}})
+  Base.broadcasted(f,a,Base.materialize(b))
+end
+
+function Base.broadcasted(
+  f,
+  a::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}},
+  b::Union{BlockPVector,BlockPBroadcasted})
+  Base.broadcasted(f,Base.materialize(a),b)
+end
+
+function Base.materialize(b::BlockPBroadcasted)
+  blocks_out = map(Base.materialize,blocks(b))
+  return mortar(blocks_out)
+end
+
+function Base.materialize!(a::BlockPVector,b::BlockPBroadcasted)
+  map(Base.materialize!,blocks(a),blocks(b))
+  return a
+end
+
diff --git a/src/GridapDistributed.jl b/src/GridapDistributed.jl
index 6df3dd25..56095155 100644
--- a/src/GridapDistributed.jl
+++ b/src/GridapDistributed.jl
@@ -23,6 +23,7 @@ using SparseArrays
 using WriteVTK
 using FillArrays
 using BlockArrays
+using LinearAlgebra
 
 import Gridap.TensorValues: inner, outer, double_contraction, symmetric_part
 import LinearAlgebra: det, tr, cross, dot, ⋅, diag
@@ -41,6 +42,8 @@ export with_ghost, no_ghost
 
 include("Algebra.jl")
 
+include("BlockPartitionedArrays.jl")
+
 include("Geometry.jl")
 
 include("CellData.jl")

From 35579bb768459f5c613dfbda5c435bff48aec919 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 29 Aug 2023 12:53:44 +1000
Subject: [PATCH 35/56] Added tests for BlockPartitionedArrays

---
 test/BlockPartitionedArraysTests.jl | 89 +++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 test/BlockPartitionedArraysTests.jl

diff --git a/test/BlockPartitionedArraysTests.jl b/test/BlockPartitionedArraysTests.jl
new file mode 100644
index 00000000..7d1941e3
--- /dev/null
+++ b/test/BlockPartitionedArraysTests.jl
@@ -0,0 +1,89 @@
+
+using Test
+using Gridap
+using PartitionedArrays
+using GridapDistributed
+using BlockArrays
+using SparseArrays
+using LinearAlgebra
+
+using GridapDistributed: BlockPArray, BlockPVector, BlockPMatrix, BlockPRange
+
+
+ranks = with_debug() do distribute
+  distribute(LinearIndices((2,)))
+end
+
+indices = map(ranks) do r
+  if r == 1
+    own_gids = [1,2,3,4,5]
+    ghost_gids   = [6,7]
+    ghost_owners = [2,2]
+  else
+    own_gids = [6,7,8,9,10]
+    ghost_gids   = [5]
+    ghost_owners = [1]
+  end
+  own_idx   = OwnIndices(10,r,own_gids)
+  ghost_idx = GhostIndices(10,ghost_gids,ghost_owners)
+  OwnAndGhostIndices(own_idx,ghost_idx)
+end
+
+block_range = BlockPRange([PRange(indices),PRange(indices)])
+
+_v = PVector{OwnAndGhostVectors{Vector{Float64}}}(undef,indices)
+v = BlockPArray([_v,_v],(block_range,))
+
+_m = map(CartesianIndices((2,2))) do I
+  i,j = I[1],I[2]
+  local_mats = map(ranks,indices) do r, ind
+    n = local_length(ind)
+    if i==j && r == 1
+      SparseMatrixCSC(n,n,Int[1,3,5,7,9,10,11,13],Int[1,2,2,3,3,4,4,5,5,6,6,7],fill(1.0,12))
+    elseif i==j && r == 2
+      SparseMatrixCSC(n,n,Int[1,2,4,6,8,10,11],Int[1,1,2,2,3,3,4,4,5,6],fill(1.0,10))
+    else
+      SparseMatrixCSC(n,n,fill(Int(1),n+1),Int[],Float64.([]))
+    end
+  end
+  PSparseMatrix(local_mats,indices,indices)
+end
+m = BlockPArray(_m,(block_range,block_range))
+
+x = similar(_v)
+mul!(x,_m[1,1],_v)
+
+# BlockPRange
+
+@test blocklength(block_range) == 2
+@test blocksize(block_range) == (2,)
+
+# BlockPArray
+
+__v = similar(v,block_range)
+__m = similar(m,(block_range,block_range))
+fill!(v,1.0)
+
+__v = __v .+ 1.0
+__v = __v .- 1.0
+__v = __v .* 1.0
+__v = __v ./ 1.0
+
+__m = __m .+ 1.0
+__m = __m .- 1.0
+__m = __m .* 1.0
+__m = __m ./ 1.0
+
+# LinearAlgebra
+
+x = similar(v)
+mul!(x,m,v)
+consistent!(x) |> fetch
+partition(x)
+
+dot(v,x)
+norm(v)
+copy!(x,v)
+
+LinearAlgebra.fillstored!(__m,1.0)
+

From cf59ff2c02d6e39d3a4c7d80c45432132c66a748 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 29 Aug 2023 14:29:39 +1000
Subject: [PATCH 36/56] Updated block assembly to use BlockPartitionedArrays

---
 src/Algebra.jl                           |  7 +++++++
 src/BlockPartitionedArrays.jl            |  7 ++++---
 src/MultiField.jl                        | 20 +++-----------------
 test/BlockSparseMatrixAssemblersTests.jl | 19 +++++--------------
 4 files changed, 19 insertions(+), 34 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index 37837857..dc600124 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -186,6 +186,13 @@ function change_ghost(::Type{<:OwnAndGhostVectors},a::PVector,ids::PRange)
   return PVector(values,partition(ids))
 end
 
+function change_ghost(a::BlockPVector,ids::BlockPRange;is_consistent=false,make_consistent=false)
+  vals = map(blocks(a),blocks(ids)) do a, ids
+    change_ghost(a,ids;is_consistent=is_consistent,make_consistent=make_consistent)
+  end
+  return BlockPVector(vals,ids)
+end
+
 # This function computes a mapping among the local identifiers of a and b
 # for which the corresponding global identifiers are both in a and b. 
 # Note that the haskey check is necessary because in the general case 
diff --git a/src/BlockPartitionedArrays.jl b/src/BlockPartitionedArrays.jl
index 969a5f3c..2b6ad46a 100644
--- a/src/BlockPartitionedArrays.jl
+++ b/src/BlockPartitionedArrays.jl
@@ -160,8 +160,8 @@ function BlockArrays.mortar(blocks::Matrix{<:PSparseMatrix})
   cols = map(b->axes(b,2),blocks[1,:])
 
   function check_axes(a,r,c)
-    A = matching_local_indices(axes(a,1),r)
-    B = matching_local_indices(axes(a,2),c)
+    A = PartitionedArrays.matching_local_indices(axes(a,1),r)
+    B = PartitionedArrays.matching_local_indices(axes(a,2),c)
     return A & B
   end
   @check all(map(I -> check_axes(blocks[I],rows[I[1]],cols[I[2]]),CartesianIndices(size(blocks))))
@@ -183,7 +183,8 @@ function PartitionedArrays.consistent!(a::BlockPArray)
 end
 
 function PartitionedArrays.partition(a::BlockPArray)
-  return map(partition,blocks(a)) |> to_parray_of_arrays
+  vals = map(partition,blocks(a)) |> to_parray_of_arrays
+  return map(mortar,vals)
 end
 
 function PartitionedArrays.to_trivial_partition(a::BlockPArray)
diff --git a/src/MultiField.jl b/src/MultiField.jl
index 3027082e..85f4509d 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -66,7 +66,7 @@ function FESpaces.get_free_dof_ids(fs::DistributedMultiFieldFESpace)
 end
 
 function MultiField.restrict_to_field(
-  f::DistributedMultiFieldFESpace,free_values::PVector,field::Integer)
+  f::DistributedMultiFieldFESpace,free_values::AbstractVector,field::Integer)
   values = map(f.part_fe_space,partition(free_values)) do u,x
     restrict_to_field(u,x,field)
   end
@@ -74,26 +74,12 @@ function MultiField.restrict_to_field(
   PVector(values,partition(gids))
 end
 
-function MultiField.restrict_to_field(
-  f::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle},free_values::BlockVector,field::Integer)
-
-  # BlockVector{PVector} -> PVector{BlockVector}
-  fv1 = map(partition,blocks(free_values)) |> to_parray_of_arrays
-  fv2 = map(mortar,fv1)
-
-  values = map(f.part_fe_space,fv2) do u,x
-    restrict_to_field(u,x,field)
-  end
-  gids = f.field_fe_space[field].gids
-  PVector(values,partition(gids))
-end
-
-function change_ghost(x::BlockVector,
+function change_ghost(x::BlockPVector,
                       X::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}}) where {NB,SB,P}
   array = map(X.block_gids,blocks(x)) do gids, xi
     change_ghost(xi,gids)
   end
-  return mortar(array)
+  return BlockPVector(array,X.block_gids)
 end
 
 #function FESpaces.zero_free_values(f::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle})
diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
index c93ad0e8..4352af83 100644
--- a/test/BlockSparseMatrixAssemblersTests.jl
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -7,18 +7,9 @@ using Gridap.FESpaces, Gridap.ReferenceFEs, Gridap.MultiField
 
 using GridapDistributed
 using PartitionedArrays
+using GridapDistributed: BlockPVector, BlockPMatrix
 
-function LinearAlgebra.mul!(y::BlockVector,A::BlockMatrix,x::BlockVector)
-  o = one(eltype(A))
-  for i in blockaxes(A,2)
-    fill!(y[i],0.0)
-    for j in blockaxes(A,2)
-      mul!(y[i],A[i,j],x[j],o,o)
-    end
-  end
-end
-
-function is_same_vector(x::BlockVector,y::PVector,Ub,U)
+function is_same_vector(x::BlockPVector,y::PVector,Ub,U)
   y_fespace = GridapDistributed.change_ghost(y,U.gids)
   x_fespace = GridapDistributed.change_ghost(x,Ub)
 
@@ -30,13 +21,13 @@ function is_same_vector(x::BlockVector,y::PVector,Ub,U)
   return all(res)
 end
 
-function is_same_matrix(Ab::BlockMatrix,A::PSparseMatrix,Xb,X)
+function is_same_matrix(Ab::BlockPMatrix,A::PSparseMatrix,Xb,X)
   yb = mortar(map(Aii->pfill(0.0,partition(axes(Aii,1))),diag(blocks(Ab))));
   xb = mortar(map(Aii->pfill(1.0,partition(axes(Aii,2))),diag(blocks(Ab))));
   mul!(yb,Ab,xb)
 
-  y = pfill(0.0,partition(axes(A)[1]))
-  x = pfill(1.0,partition(axes(A)[2]))
+  y = pfill(0.0,partition(axes(A,1)))
+  x = pfill(1.0,partition(axes(A,2)))
   mul!(y,A,x)
 
   return is_same_vector(yb,y,Xb,X)

From 5f2923b10782cb1069696470c7b1320416bbf3de Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 29 Aug 2023 14:41:33 +1000
Subject: [PATCH 37/56] Fixed import order

---
 src/BlockPartitionedArrays.jl | 7 ++++++-
 src/GridapDistributed.jl      | 4 ++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/BlockPartitionedArrays.jl b/src/BlockPartitionedArrays.jl
index 2b6ad46a..dba50fc4 100644
--- a/src/BlockPartitionedArrays.jl
+++ b/src/BlockPartitionedArrays.jl
@@ -16,6 +16,10 @@ BlockArrays.blocksize(a::BlockPRange) = (blocklength(a),)
 BlockArrays.blockaxes(a::BlockPRange) = (Block.(Base.OneTo(blocklength(a))),)
 BlockArrays.blocks(a::BlockPRange) = a.ranges
 
+function PartitionedArrays.partition(a::BlockPRange)
+  return map(partition,blocks(a)) |> to_parray_of_arrays
+end
+
 """
 """
 struct BlockPArray{T,N,A,B} <: BlockArrays.AbstractBlockArray{T,N}
@@ -195,9 +199,10 @@ end
 # LinearAlgebra API
 
 function LinearAlgebra.mul!(y::BlockPVector,A::BlockPMatrix,x::BlockPVector)
+  z = zero(eltype(y))
   o = one(eltype(A))
   for i in blockaxes(A,2)
-    fill!(y[i],0.0)
+    fill!(y[i],z)
     for j in blockaxes(A,2)
       mul!(y[i],A[i,j],x[j],o,o)
     end
diff --git a/src/GridapDistributed.jl b/src/GridapDistributed.jl
index 56095155..9e985bc8 100644
--- a/src/GridapDistributed.jl
+++ b/src/GridapDistributed.jl
@@ -40,10 +40,10 @@ export get_face_gids
 export local_views, get_parts
 export with_ghost, no_ghost
 
-include("Algebra.jl")
-
 include("BlockPartitionedArrays.jl")
 
+include("Algebra.jl")
+
 include("Geometry.jl")
 
 include("CellData.jl")

From f73f0d6da1411d32f639206dfb1dc75f16afeed7 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 29 Aug 2023 19:05:16 +1000
Subject: [PATCH 38/56] BlockMultiFieldStyle now returns BlockPVector free
 values

---
 src/Algebra.jl                      |   8 +-
 src/BlockPartitionedArrays.jl       |  38 +++++--
 src/FESpaces.jl                     |   6 +-
 src/MultiField.jl                   |  67 ++++++------
 test/BlockPartitionedArraysTests.jl |   5 +
 test/MultiFieldTests.jl             |  54 ++++++----
 test/StokesHdivDGTests.jl           | 156 ++++++++++++++--------------
 7 files changed, 190 insertions(+), 144 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index dc600124..a405964d 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -284,8 +284,12 @@ function local_views(row_col_partitioned_matrix::PSparseMatrix,
     end
 end
 
-function Algebra.allocate_vector(::Type{<:PVector{T}},ids::PRange) where {T}
-  PVector{T}(undef,partition(ids))
+function Algebra.allocate_vector(::Type{<:PVector{V}},ids::PRange) where {V}
+  PVector{V}(undef,partition(ids))
+end
+
+function Algebra.allocate_vector(::Type{<:BlockPVector{V}},ids::BlockPRange) where {V}
+  BlockPVector{V}(undef,ids)
 end
 
 # PSparseMatrix assembly
diff --git a/src/BlockPartitionedArrays.jl b/src/BlockPartitionedArrays.jl
index dba50fc4..29e09599 100644
--- a/src/BlockPartitionedArrays.jl
+++ b/src/BlockPartitionedArrays.jl
@@ -22,21 +22,24 @@ end
 
 """
 """
-struct BlockPArray{T,N,A,B} <: BlockArrays.AbstractBlockArray{T,N}
+struct BlockPArray{V,T,N,A,B} <: BlockArrays.AbstractBlockArray{T,N}
   blocks::Array{A,N}
   axes::NTuple{N,B}
 
   function BlockPArray(blocks::Array{<:AbstractArray{T,N},N},
                        axes  ::NTuple{N,<:BlockPRange}) where {T,N}
     @check all(map(d->size(blocks,d)==blocklength(axes[d]),1:N))
+    local_type(::Type{<:PVector{V}}) where V = V
+    local_type(::Type{<:PSparseMatrix{V}}) where V = V
     A = eltype(blocks)
     B = typeof(first(axes))
-    new{T,N,A,B}(blocks,axes)
+    V = local_type(A)
+    new{V,T,N,A,B}(blocks,axes)
   end
 end
 
-const BlockPVector{T,A,B} = BlockPArray{T,1,A,B}
-const BlockPMatrix{T,A,B} = BlockPArray{T,2,A,B}
+const BlockPVector{V,T,A,B} = BlockPArray{V,T,1,A,B}
+const BlockPMatrix{V,T,A,B} = BlockPArray{V,T,2,A,B}
 
 @inline function BlockPVector(blocks::Vector{<:PVector},rows::BlockPRange)
   BlockPArray(blocks,(rows,))
@@ -54,6 +57,25 @@ end
   BlockPMatrix(blocks,BlockPRange(rows),BlockPRange(cols))
 end
 
+function BlockPVector{V}(::UndefInitializer,rows::BlockPRange) where {V}
+  vals = map(blocks(rows)) do r
+    PVector{V}(undef,partition(r))
+  end
+  return BlockPVector(vals,rows)
+end
+
+function BlockPMatrix{V}(::UndefInitializer,rows::BlockPRange,cols::BlockPRange) where {V}
+  block_ids = CartesianIndices((blocklength(rows),blocklength(cols)))
+  block_rows = blocks(rows)
+  block_cols = blocks(cols)
+  vals = map(block_ids) do I
+    r = block_rows[I[1]]
+    c = block_cols[I[2]]
+    PSparseMatrix{V}(undef,partition(r),partition(c))
+  end
+  return BlockPMatrix(vals,rows)
+end
+
 # AbstractArray API
 
 Base.axes(a::BlockPArray) = a.axes
@@ -69,7 +91,7 @@ function Base.similar(a::BlockPVector,::Type{T},inds::Tuple{<:BlockPRange}) wher
   return BlockPArray(vals,inds)
 end
 
-function Base.similar(::Type{<:BlockPVector{T,A}},inds::Tuple{<:BlockPRange}) where {T,A}
+function Base.similar(::Type{<:BlockPVector{V,T,A}},inds::Tuple{<:BlockPRange}) where {V,T,A}
   rows   = blocks(inds[1])
   values = map(rows) do r
     return similar(A,(r,))
@@ -86,7 +108,7 @@ function Base.similar(a::BlockPMatrix,::Type{T},inds::Tuple{<:BlockPRange,<:Bloc
   return BlockPArray(vals,inds)
 end
 
-function Base.similar(::Type{<:BlockPMatrix{T,A}},inds::Tuple{<:BlockPRange,<:BlockPRange}) where {T,A}
+function Base.similar(::Type{<:BlockPMatrix{V,T,A}},inds::Tuple{<:BlockPRange,<:BlockPRange}) where {V,T,A}
   rows = blocks(inds[1])
   cols = blocks(inds[2])
   values = map(CartesianIndices((length(rows),length(cols)))) do I
@@ -147,10 +169,10 @@ BlockArrays.blocks(a::BlockPArray) = a.blocks
 function Base.getindex(a::BlockPArray,inds::Block{1})
   a.blocks[inds.n...]
 end
-function Base.getindex(a::BlockPArray{T,N},inds::Block{N}) where {T,N}
+function Base.getindex(a::BlockPArray{V,T,N},inds::Block{N}) where {V,T,N}
   a.blocks[inds.n...]
 end
-function Base.getindex(a::BlockPArray{T,N},inds::Vararg{Block{1},N}) where {T,N}
+function Base.getindex(a::BlockPArray{V,T,N},inds::Vararg{Block{1},N}) where {V,T,N}
   a.blocks[map(i->i.n[1],inds)...]
 end
 
diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index 0c2fc2bd..2a2a89c7 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -495,7 +495,11 @@ function _find_vector_type(spaces,gids;own_and_ghost=false)
   if own_and_ghost
     T = OwnAndGhostVectors{T}
   end
-  vector_type = typeof(PVector{T}(undef,partition(gids)))
+  if isa(gids,PRange)
+    vector_type = typeof(PVector{T}(undef,partition(gids)))
+  else # isa(gids,BlockPRange)
+    vector_type = typeof(BlockPVector{T}(undef,gids))
+  end
   return vector_type
 end
 
diff --git a/src/MultiField.jl b/src/MultiField.jl
index 85f4509d..66e60d37 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -29,24 +29,21 @@ local_views(a::Vector{<:DistributedCellField}) = [ai.fields for ai in a]
 
 """
 """
-struct DistributedMultiFieldFESpace{MS,A,B,C,D,E} <: DistributedFESpace
+struct DistributedMultiFieldFESpace{MS,A,B,C,D} <: DistributedFESpace
   multi_field_style::MS
   field_fe_space::A
   part_fe_space::B
   gids::C
-  block_gids::D
-  vector_type::Type{E}
+  vector_type::Type{D}
   function DistributedMultiFieldFESpace(
     field_fe_space::AbstractVector{<:DistributedSingleFieldFESpace},
     part_fe_space::AbstractArray{<:MultiFieldFESpace{MS}},
-    gids::PRange,
-    block_gids,
-    vector_type::Type{E}) where {E,MS}
+    gids::Union{<:PRange,<:BlockPRange},
+    vector_type::Type{D}) where {D,MS}
     A = typeof(field_fe_space)
     B = typeof(part_fe_space)
     C = typeof(gids)
-    D = typeof(block_gids)
-    new{MS,A,B,C,D,E}(MS(),field_fe_space,part_fe_space,gids,block_gids,vector_type)
+    new{MS,A,B,C,D}(MS(),field_fe_space,part_fe_space,gids,vector_type)
   end
 end
 
@@ -76,10 +73,10 @@ end
 
 function change_ghost(x::BlockPVector,
                       X::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}}) where {NB,SB,P}
-  array = map(X.block_gids,blocks(x)) do gids, xi
+  array = map(blocks(X.gids),blocks(x)) do gids, xi
     change_ghost(xi,gids)
   end
-  return BlockPVector(array,X.block_gids)
+  return BlockPVector(array,X.gids)
 end
 
 #function FESpaces.zero_free_values(f::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle})
@@ -120,9 +117,6 @@ function FESpaces.interpolate(objects,fe::DistributedMultiFieldFESpace)
 end
 
 function FESpaces.interpolate!(objects,free_values::AbstractVector,fe::DistributedMultiFieldFESpace)
-  msg = "free_values and fe have incompatible index partitions."
-  @check partition(axes(free_values,1)) === partition(fe.gids) msg
-
   part_fe_fun = map(partition(free_values),local_views(fe)) do x,f
     interpolate!(objects,x,f)
   end
@@ -239,17 +233,16 @@ function FESpaces.TrialFESpace(objects,a::DistributedMultiFieldFESpace)
   TrialFESpace(a,objects)
 end
 
-function FESpaces.TrialFESpace(a::DistributedMultiFieldFESpace,objects)
+function FESpaces.TrialFESpace(a::DistributedMultiFieldFESpace{MS},objects) where MS
   f_dspace_test = a.field_fe_space
   f_dspace = map( arg -> TrialFESpace(arg[1],arg[2]), zip(f_dspace_test,objects) )
   f_p_space = map(local_views,f_dspace)
   v(x...) = collect(x)
   p_f_space = map(v,f_p_space...)
-  p_mspace  = map(MultiFieldFESpace,p_f_space)
+  p_mspace  = map(s->MultiFieldFESpace(s;style=MS()),p_f_space)
   gids = a.gids
-  block_gids  = a.block_gids
   vector_type = a.vector_type
-  DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,block_gids,vector_type)
+  DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,vector_type)
 end
 
 # Factory
@@ -261,16 +254,14 @@ function MultiField.MultiFieldFESpace(
 
   p_f_space   = map(v,f_p_space...)
   p_mspace    = map(f->MultiFieldFESpace(f;kwargs...),p_f_space)
-  gids        = generate_multi_field_gids(f_dspace,p_mspace)
+  style       = PartitionedArrays.getany(map(MultiFieldStyle,p_mspace))
+  gids        = generate_multi_field_gids(style,f_dspace,p_mspace)
   vector_type = _find_vector_type(p_mspace,gids;own_and_ghost=own_and_ghost)
-
-  style = MultiFieldStyle(PartitionedArrays.getany(p_mspace))
-  block_gids = _generate_block_gids(style,f_dspace)
-
-  DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,block_gids,vector_type)
+  DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,vector_type)
 end
 
 function generate_multi_field_gids(
+  ::MultiFieldStyle,
   f_dspace::Vector{<:DistributedSingleFieldFESpace},
   p_mspace::AbstractArray{<:MultiFieldFESpace})
 
@@ -289,6 +280,19 @@ function generate_multi_field_gids(
   gids = generate_multi_field_gids(f_p_flid_lid,f_frange)
 end
 
+function generate_multi_field_gids(
+  ::BlockMultiFieldStyle{NB,SB,P},
+  f_dspace::Vector{<:DistributedSingleFieldFESpace},
+  p_mspace::AbstractArray{<:MultiFieldFESpace}) where {NB,SB,P}
+
+  block_ranges = MultiField.get_block_ranges(NB,SB,P)
+  block_gids = map(block_ranges) do range
+    space = (length(range) == 1) ? f_dspace[range[1]] : MultiFieldFESpace(f_dspace[range])
+    get_free_dof_ids(space)
+  end
+  return BlockPRange(block_gids)
+end
+
 function generate_multi_field_gids(
   f_p_flid_lid::AbstractVector{<:AbstractArray{<:AbstractVector}},
   f_frange::AbstractVector{<:PRange})
@@ -403,17 +407,6 @@ function propagate_to_ghost_multifield!(
   end
 end
 
-_generate_block_gids(::MultiFieldStyle,f_dspace) = nothing
-
-function _generate_block_gids(::BlockMultiFieldStyle{NB,SB,P},f_dspace) where {NB,SB,P}
-  block_ranges = MultiField.get_block_ranges(NB,SB,P)
-  block_gids = map(block_ranges) do range
-    space = (length(range) == 1) ? f_dspace[range[1]] : MultiFieldFESpace(f_dspace[range])
-    get_free_dof_ids(space)
-  end
-  return block_gids
-end
-
 # BlockSparseMatrixAssemblers
 
 const DistributedBlockSparseMatrixAssembler{NB,NV,SB,P} = 
@@ -426,9 +419,11 @@ function FESpaces.SparseMatrixAssembler(
   test::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}},
   par_strategy=SubAssembledRows()) where {NB,SB,P}
 
-  block_idx = CartesianIndices((NB,NB))
+  block_idx  = CartesianIndices((NB,NB))
+  block_rows = blocks(test.gids)
+  block_cols = blocks(trial.gids)
   block_assemblers = map(block_idx) do idx
-    rows = test.block_gids[idx[1]]; cols = trial.block_gids[idx[2]]
+    rows = block_rows[idx[1]]; cols = block_cols[idx[2]]
     return SparseMatrixAssembler(local_mat_type,local_vec_type,rows,cols,par_strategy)
   end
 
diff --git a/test/BlockPartitionedArraysTests.jl b/test/BlockPartitionedArraysTests.jl
index 7d1941e3..2200bda3 100644
--- a/test/BlockPartitionedArraysTests.jl
+++ b/test/BlockPartitionedArraysTests.jl
@@ -87,3 +87,8 @@ copy!(x,v)
 
 LinearAlgebra.fillstored!(__m,1.0)
 
+__v = BlockPVector{Float64,PVector{Vector{Float64}}}(undef,block_range)
+
+m[Block(1,1)]
+m[Block(1),Block(1)]
+
diff --git a/test/MultiFieldTests.jl b/test/MultiFieldTests.jl
index b5a4f534..cb53e322 100644
--- a/test/MultiFieldTests.jl
+++ b/test/MultiFieldTests.jl
@@ -2,11 +2,17 @@ module MultiFieldTests
 
 using Gridap
 using Gridap.FESpaces
+using Gridap.MultiField
 using GridapDistributed
 using PartitionedArrays
 using Test
 
-function main(distribute, parts)
+function l2_error(u1,u2,dΩ)
+  eu = u1 - u2
+  sqrt(sum(∫( eu⋅eu )dΩ))
+end
+
+function main(distribute, parts, mfs)
   ranks  = distribute(LinearIndices((prod(parts),)))
   output = mkpath(joinpath(@__DIR__,"output"))
 
@@ -16,6 +22,7 @@ function main(distribute, parts)
   Ω = Triangulation(model)
 
   k = 2
+  dΩ = Measure(Ω,2*k)
   reffe_u = ReferenceFE(lagrangian,VectorValue{2,Float64},k)
   reffe_p = ReferenceFE(lagrangian,Float64,k-1,space=:P)
 
@@ -29,34 +36,43 @@ function main(distribute, parts)
   U = TrialFESpace(V,u)
   P = TrialFESpace(Q,p)
 
-  VxQ = MultiFieldFESpace([V,Q])
-  UxP = MultiFieldFESpace([U,P]) # This generates again the global numbering
+  VxQ = MultiFieldFESpace([V,Q];style=mfs)
+  UxP = MultiFieldFESpace([U,P];style=mfs) # This generates again the global numbering
   UxP = TrialFESpace([u,p],VxQ) # This reuses the one computed
   @test length(UxP) == 2
 
   uh, ph = interpolate([u,p],UxP)
-  eu = u - uh
-  ep = p - ph
-
-  dΩ = Measure(Ω,2*k)
-  @test sqrt(sum(∫( eu⋅eu )dΩ)) < 1.0e-9
-  @test sqrt(sum(∫( eu⋅eu )dΩ)) < 1.0e-9
+  @test l2_error(u,uh,dΩ) < 1.0e-9
+  @test l2_error(p,ph,dΩ) < 1.0e-9
 
   a((u,p),(v,q)) = ∫( ∇(v)⊙∇(u) - q*(∇⋅u) - (∇⋅v)*p )*dΩ
   l((v,q)) = ∫( v⋅f - q*g )*dΩ
 
   op = AffineFEOperator(a,l,UxP,VxQ)
-  solver = LinearFESolver(BackslashSolver())
-  uh, ph = solve(solver,op)
-
-  eu = u - uh
-  ep = p - ph
-
-  writevtk(Ω,"Ω",nsubcells=10,cellfields=["uh"=>uh,"ph"=>ph])
-
-  @test sqrt(sum(∫( eu⋅eu )dΩ)) < 1.0e-9
-  @test sqrt(sum(∫( eu⋅eu )dΩ)) < 1.0e-9
+  if !isa(mfs,BlockMultiFieldStyle) # BlockMultiFieldStyle does not support BackslashSolver
+    solver = LinearFESolver(BackslashSolver())
+    uh, ph = solve(solver,op)
+    @test l2_error(u,uh,dΩ) < 1.0e-9
+    @test l2_error(p,ph,dΩ) < 1.0e-9
+
+    writevtk(Ω,"Ω",nsubcells=10,cellfields=["uh"=>uh,"ph"=>ph])
+  end
+
+  A  = get_matrix(op)
+  xh = interpolate([u,p],UxP)
+  x  = GridapDistributed.change_ghost(get_free_dof_values(xh),axes(A,2))
+  uh1, ph1 = FESpaces.EvaluationFunction(UxP,x)
+  uh2, ph2 = FEFunction(UxP,x)
+
+  @test l2_error(u,uh1,dΩ) < 1.0e-9
+  @test l2_error(p,ph1,dΩ) < 1.0e-9
+  @test l2_error(u,uh2,dΩ) < 1.0e-9
+  @test l2_error(p,ph2,dΩ) < 1.0e-9
+end
 
+function main(distribute, parts)
+  main(distribute, parts, ConsecutiveMultiFieldStyle())
+  main(distribute, parts, BlockMultiFieldStyle())
 end
 
 end # module
diff --git a/test/StokesHdivDGTests.jl b/test/StokesHdivDGTests.jl
index f3f19c78..f9d77494 100644
--- a/test/StokesHdivDGTests.jl
+++ b/test/StokesHdivDGTests.jl
@@ -34,84 +34,84 @@ function stokes_solution_2D(μ::Real)
 end
 
 function main(distribute,parts)
-    ranks = distribute(LinearIndices((prod(parts),)))
-    
-    μ = 1.0
-    sol = stokes_solution_2D(μ)
-    u_ref = sol.u
-    f_ref = sol.f
-    σ_ref = sol.σ
-
-    D = 2
-    n = 4
-    domain    = Tuple(repeat([0,1],D))
-    partition = (n,n)
-    model     = CartesianDiscreteModel(ranks,parts,domain,partition)
-
-    labels = get_face_labeling(model)
-    add_tag_from_tags!(labels,"dirichlet",[5,6,7])
-    add_tag_from_tags!(labels,"neumann",[8,])
-
-    ############################################################################################
-    order = 1
-    reffeᵤ = ReferenceFE(raviart_thomas,Float64,order)
-    V = TestFESpace(model,reffeᵤ,conformity=:HDiv,dirichlet_tags="dirichlet")
-    U = TrialFESpace(V,u_ref)
-
-    reffeₚ = ReferenceFE(lagrangian,Float64,order;space=:P)
-    Q = TestFESpace(model,reffeₚ,conformity=:L2)
-    P = TrialFESpace(Q)
-
-    Y = MultiFieldFESpace([V, Q])
-    X = MultiFieldFESpace([U, P])
-
-    qdegree = 2*order+1
-    Ω   = Triangulation(model)
-    dΩ  = Measure(Ω,qdegree)
-
-    Γ   = BoundaryTriangulation(model)
-    dΓ  = Measure(Γ,qdegree)
-    n_Γ = get_normal_vector(Γ)
-
-    Γ_D  = BoundaryTriangulation(model;tags=["dirichlet"])
-    dΓ_D = Measure(Γ_D,qdegree)
-    n_Γ_D = get_normal_vector(Γ_D)
-
-    Γ_N  = BoundaryTriangulation(model;tags="neumann")
-    dΓ_N = Measure(Γ_N,qdegree)
-    n_Γ_N = get_normal_vector(Γ_N)
-
-    Λ   = SkeletonTriangulation(model)
-    dΛ  = Measure(Λ,qdegree)
-    n_Λ = get_normal_vector(Λ)
-
-    h_e    = CellField(map(get_array,local_views(∫(1)dΩ)),Ω)
-    h_e_Λ  = CellField(map(get_array,local_views(∫(1)dΛ)),Λ)
-    h_e_Γ_D = CellField(map(get_array,local_views(∫(1)dΓ_D)),Γ_D)
-
-    β_U = 50.0
-    Δ_dg(u,v) = ∫(∇(v)⊙∇(u))dΩ - 
-                ∫(jump(v⊗n_Λ)⊙(mean(∇(u))))dΛ -
-                ∫(mean(∇(v))⊙jump(u⊗n_Λ))dΛ - 
-                ∫(v⋅(∇(u)⋅n_Γ_D))dΓ_D - 
-                ∫((∇(v)⋅n_Γ_D)⋅u)dΓ_D
-    rhs((v,q)) = ∫((f_ref⋅v))*dΩ - ∫((∇(v)⋅n_Γ_D)⋅u_ref)dΓ_D + ∫((n_Γ_N⋅σ_ref)⋅v)*dΓ_N
-
-    penalty(u,v) = ∫(jump(v⊗n_Λ)⊙((β_U/h_e_Λ*jump(u⊗n_Λ))))dΛ + ∫(v⋅(β_U/h_e_Γ_D*u))dΓ_D
-    penalty_rhs((v,q)) = ∫(v⋅(β_U/h_e_Γ_D*u_ref))dΓ_D
-
-    a((u,p),(v,q)) = Δ_dg(u,v) + ∫(-(∇⋅v)*p - q*(∇⋅u))dΩ  + penalty(u,v)  
-    l((v,q)) = rhs((v,q)) - ∫(q*(∇⋅u_ref))dΩ + penalty_rhs((v,q)) 
-
-    op = AffineFEOperator(a,l,X,Y)
-    xh = solve(op)
-
-    uh, ph = xh
-    err_u = l2_error(Ω,uh,sol.u) 
-    err_p = l2_error(Ω,ph,sol.p)
-    tol = 1.0e-12
-    @test err_u < tol
-    @test err_p < tol
+  ranks = distribute(LinearIndices((prod(parts),)))
+  
+  μ = 1.0
+  sol = stokes_solution_2D(μ)
+  u_ref = sol.u
+  f_ref = sol.f
+  σ_ref = sol.σ
+
+  D = 2
+  n = 4
+  domain    = Tuple(repeat([0,1],D))
+  partition = (n,n)
+  model     = CartesianDiscreteModel(ranks,parts,domain,partition)
+
+  labels = get_face_labeling(model)
+  add_tag_from_tags!(labels,"dirichlet",[5,6,7])
+  add_tag_from_tags!(labels,"neumann",[8,])
+
+  ############################################################################################
+  order = 1
+  reffeᵤ = ReferenceFE(raviart_thomas,Float64,order)
+  V = TestFESpace(model,reffeᵤ,conformity=:HDiv,dirichlet_tags="dirichlet")
+  U = TrialFESpace(V,u_ref)
+
+  reffeₚ = ReferenceFE(lagrangian,Float64,order;space=:P)
+  Q = TestFESpace(model,reffeₚ,conformity=:L2)
+  P = TrialFESpace(Q)
+
+  Y = MultiFieldFESpace([V, Q])
+  X = MultiFieldFESpace([U, P])
+
+  qdegree = 2*order+1
+  Ω   = Triangulation(model)
+  dΩ  = Measure(Ω,qdegree)
+
+  Γ   = BoundaryTriangulation(model)
+  dΓ  = Measure(Γ,qdegree)
+  n_Γ = get_normal_vector(Γ)
+
+  Γ_D  = BoundaryTriangulation(model;tags=["dirichlet"])
+  dΓ_D = Measure(Γ_D,qdegree)
+  n_Γ_D = get_normal_vector(Γ_D)
+
+  Γ_N  = BoundaryTriangulation(model;tags="neumann")
+  dΓ_N = Measure(Γ_N,qdegree)
+  n_Γ_N = get_normal_vector(Γ_N)
+
+  Λ   = SkeletonTriangulation(model)
+  dΛ  = Measure(Λ,qdegree)
+  n_Λ = get_normal_vector(Λ)
+
+  h_e    = CellField(map(get_array,local_views(∫(1)dΩ)),Ω)
+  h_e_Λ  = CellField(map(get_array,local_views(∫(1)dΛ)),Λ)
+  h_e_Γ_D = CellField(map(get_array,local_views(∫(1)dΓ_D)),Γ_D)
+
+  β_U = 50.0
+  Δ_dg(u,v) = ∫(∇(v)⊙∇(u))dΩ - 
+              ∫(jump(v⊗n_Λ)⊙(mean(∇(u))))dΛ -
+              ∫(mean(∇(v))⊙jump(u⊗n_Λ))dΛ - 
+              ∫(v⋅(∇(u)⋅n_Γ_D))dΓ_D - 
+              ∫((∇(v)⋅n_Γ_D)⋅u)dΓ_D
+  rhs((v,q)) = ∫((f_ref⋅v))*dΩ - ∫((∇(v)⋅n_Γ_D)⋅u_ref)dΓ_D + ∫((n_Γ_N⋅σ_ref)⋅v)*dΓ_N
+
+  penalty(u,v) = ∫(jump(v⊗n_Λ)⊙((β_U/h_e_Λ*jump(u⊗n_Λ))))dΛ + ∫(v⋅(β_U/h_e_Γ_D*u))dΓ_D
+  penalty_rhs((v,q)) = ∫(v⋅(β_U/h_e_Γ_D*u_ref))dΓ_D
+
+  a((u,p),(v,q)) = Δ_dg(u,v) + ∫(-(∇⋅v)*p - q*(∇⋅u))dΩ  + penalty(u,v)  
+  l((v,q)) = rhs((v,q)) - ∫(q*(∇⋅u_ref))dΩ + penalty_rhs((v,q)) 
+
+  op = AffineFEOperator(a,l,X,Y)
+  xh = solve(op)
+
+  uh, ph = xh
+  err_u = l2_error(Ω,uh,sol.u) 
+  err_p = l2_error(Ω,ph,sol.p)
+  tol = 1.0e-12
+  @test err_u < tol
+  @test err_p < tol
 end
 
 end # module

From 480ff0a43dd803f9791f23d93134b00c1b1f396a Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 29 Aug 2023 19:23:56 +1000
Subject: [PATCH 39/56] Small cleanup

---
 src/FESpaces.jl   |  2 +-
 src/MultiField.jl | 12 ------------
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index 2a2a89c7..1ae1110b 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -125,7 +125,7 @@ function fetch_vector_ghost_values!(vector_partition,cache)
   assemble!((a,b)->b, vector_partition, cache) 
 end
 
-function change_ghost(a::PVector,f::DistributedFESpace)
+function change_ghost(a,f::DistributedFESpace)
   change_ghost(a,f.gids)
 end
 
diff --git a/src/MultiField.jl b/src/MultiField.jl
index 66e60d37..5b65c765 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -71,18 +71,6 @@ function MultiField.restrict_to_field(
   PVector(values,partition(gids))
 end
 
-function change_ghost(x::BlockPVector,
-                      X::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}}) where {NB,SB,P}
-  array = map(blocks(X.gids),blocks(x)) do gids, xi
-    change_ghost(xi,gids)
-  end
-  return BlockPVector(array,X.gids)
-end
-
-#function FESpaces.zero_free_values(f::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle})
-#  return mortar(map(zero_free_values,f.field_fe_space))
-#end
-
 function FESpaces.FEFunction(
   f::DistributedMultiFieldFESpace,x::AbstractVector,isconsistent=false)
   free_values  = change_ghost(x,f.gids;is_consistent=isconsistent,make_consistent=true)

From c8c6d2a395ae07cfbd5e86b03aedbdb5d4cfd1d5 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Tue, 29 Aug 2023 19:24:58 +1000
Subject: [PATCH 40/56] More cleanup

---
 src/FESpaces.jl                          | 4 ----
 test/BlockSparseMatrixAssemblersTests.jl | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index 1ae1110b..23c18d45 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -125,10 +125,6 @@ function fetch_vector_ghost_values!(vector_partition,cache)
   assemble!((a,b)->b, vector_partition, cache) 
 end
 
-function change_ghost(a,f::DistributedFESpace)
-  change_ghost(a,f.gids)
-end
-
 function generate_gids(
   cell_range::PRange,
   cell_to_ldofs::AbstractArray{<:AbstractArray},
diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
index 4352af83..aa99ce13 100644
--- a/test/BlockSparseMatrixAssemblersTests.jl
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -11,7 +11,7 @@ using GridapDistributed: BlockPVector, BlockPMatrix
 
 function is_same_vector(x::BlockPVector,y::PVector,Ub,U)
   y_fespace = GridapDistributed.change_ghost(y,U.gids)
-  x_fespace = GridapDistributed.change_ghost(x,Ub)
+  x_fespace = GridapDistributed.change_ghost(x,Ub.gids)
 
   res = map(1:num_fields(Ub)) do i
     xi = restrict_to_field(Ub,x_fespace,i)

From 6afb3f4741ee587209e5a29f7b7c0ed1eeda0ce8 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Wed, 30 Aug 2023 12:06:05 +1000
Subject: [PATCH 41/56] Expand block assembly tests

---
 test/BlockSparseMatrixAssemblersTests.jl | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
index aa99ce13..9b92f185 100644
--- a/test/BlockSparseMatrixAssemblersTests.jl
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -69,9 +69,18 @@ function _main(n_spaces,mfs,weakform,Ω,dΩ,U,V)
   @test is_same_vector(b1_blocks,b1,Yb,Y)
   @test is_same_matrix(A1_blocks,A1,Xb,X)
 
+  assemble_matrix!(A1_blocks,assem_blocks,bmatdata);
+  assemble_vector!(b1_blocks,assem_blocks,bvecdata);
+  @test is_same_vector(b1_blocks,b1,Yb,Y)
+  @test is_same_matrix(A1_blocks,A1,Xb,X)
+
   A2_blocks, b2_blocks = assemble_matrix_and_vector(assem_blocks,bdata)
   @test is_same_vector(b2_blocks,b2,Yb,Y)
   @test is_same_matrix(A2_blocks,A2,Xb,X)
+  
+  assemble_matrix_and_vector!(A2_blocks,b2_blocks,assem_blocks,bdata)
+  @test is_same_vector(b2_blocks,b2,Yb,Y)
+  @test is_same_matrix(A2_blocks,A2,Xb,X)
 
   op = AffineFEOperator(biform,liform,X,Y)
   block_op = AffineFEOperator(biform,liform,Xb,Yb)

From 3d1d5edb8c95f541673f6ac27d610408388b02ee Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Wed, 30 Aug 2023 12:10:09 +1000
Subject: [PATCH 42/56] Added DistributedAssemblyStartegy

---
 src/Algebra.jl                | 94 ++++++++++++++++++++++++-----------
 src/BlockPartitionedArrays.jl |  2 +-
 src/FESpaces.jl               |  6 +--
 3 files changed, 69 insertions(+), 33 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index a405964d..1c2132d5 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -263,25 +263,25 @@ end
 function local_views(row_col_partitioned_matrix::PSparseMatrix,
                      test_dofs_partition::PRange,
                      trial_dofs_partition::PRange)
-    if (row_col_partitioned_matrix.row_partition === partition(test_dofs_partition) || 
-      row_col_partitioned_matrix.col_partition === partition(trial_dofs_partition) )
-      @assert false                 
-    else 
-      map(
-        partition(row_col_partitioned_matrix),
-        partition(test_dofs_partition),
-        partition(trial_dofs_partition),
-        row_col_partitioned_matrix.row_partition,
-        row_col_partitioned_matrix.col_partition) do matrix_partition,
-                                                                test_dof_partition,
-                                                                trial_dof_partition,
-                                                                row_partition,
-                                                                col_partition
-        rl2lmap = find_local_to_local_map(test_dof_partition,row_partition)
-        cl2lmap = find_local_to_local_map(trial_dof_partition,col_partition)
-        LocalView(matrix_partition,(rl2lmap,cl2lmap))
-      end
+  if (row_col_partitioned_matrix.row_partition === partition(test_dofs_partition) || 
+    row_col_partitioned_matrix.col_partition === partition(trial_dofs_partition) )
+    @assert false                 
+  else 
+    map(
+      partition(row_col_partitioned_matrix),
+      partition(test_dofs_partition),
+      partition(trial_dofs_partition),
+      row_col_partitioned_matrix.row_partition,
+      row_col_partitioned_matrix.col_partition) do matrix_partition,
+                                                              test_dof_partition,
+                                                              trial_dof_partition,
+                                                              row_partition,
+                                                              col_partition
+      rl2lmap = find_local_to_local_map(test_dof_partition,row_partition)
+      cl2lmap = find_local_to_local_map(trial_dof_partition,col_partition)
+      LocalView(matrix_partition,(rl2lmap,cl2lmap))
     end
+  end
 end
 
 function Algebra.allocate_vector(::Type{<:PVector{V}},ids::PRange) where {V}
@@ -294,8 +294,33 @@ end
 
 # PSparseMatrix assembly
 
-struct FullyAssembledRows end
-struct SubAssembledRows end
+"""
+  ParallelAssemblyStrategy(ghosted_rows::Bool,optimize_ghosts::Bool)
+
+  Two main strategies are available for parallel assembly:
+    - FullyAssembledRows: the rows of the matrix are assembled only in the process owning the row.
+    - SubAssembledRows: processors also hold ghost rows.
+    
+  Options: 
+    - optimize_ghosts: If `false`, the FESpace PRanges are used for the linear system. 
+                       If `true`, ghost ids are reduced to minimize communications. 
+"""
+struct ParallelAssemblyStrategy{GR,OG}
+  ghosted_rows::Bool
+  optimize_ghosts::Bool
+  function ParallelAssemblyStrategy(ghosted_rows::Bool,optimize_ghosts::Bool)
+    new{ghosted_rows,optimize_ghosts}(ghosted_rows,optimize_ghosts)
+  end
+end
+
+const FullyAssembledRows{OG} = ParallelAssemblyStrategy{false,OG}
+FullyAssembledRows(;optimize_ghosts=true) = ParallelAssemblyStrategy(false,optimize_ghosts)
+
+const SubAssembledRows{OG} = ParallelAssemblyStrategy{true,OG}
+SubAssembledRows(;optimize_ghosts=true) = ParallelAssemblyStrategy(true,optimize_ghosts)
+
+optimize_ghosts(::Type{ParallelAssemblyStrategy{GR,OG}}) where {GR,OG} = OG
+optimize_ghosts(a::ParallelAssemblyStrategy) = optimize_ghosts(typeof(a))
 
 # For the moment we use COO format even though
 # it is quite memory consuming.
@@ -427,6 +452,11 @@ get_trial_gids(a::DistributedAllocationCOO) = a.trial_dofs_gids_prange
 get_test_gids(a::ArrayBlock{<:DistributedAllocationCOO})  = map(get_test_gids,diag(a.array))
 get_trial_gids(a::ArrayBlock{<:DistributedAllocationCOO}) = map(get_trial_gids,diag(a.array))
 
+ParallelAssemblyStrategy(a::DistributedAllocationCOO) = a.par_strategy
+function ParallelAssemblyStrategy(a::ArrayBlock{<:DistributedAllocationCOO})
+  return ParallelAssemblyStrategy(first(a.array))
+end
+
 function Algebra.create_from_nz(a::PSparseMatrix)
   # For FullyAssembledRows the underlying Exchanger should
   # not have ghost layer making assemble! do nothing (TODO check)
@@ -436,17 +466,19 @@ end
 
 function Algebra.create_from_nz(a::DistributedAllocationCOO{<:FullyAssembledRows})
   f(x) = nothing
-  A, = _fa_create_from_nz_with_callback(f,a)
+  s    = ParallelAssemblyStrategy(a)
+  A, = _fa_create_from_nz_with_callback(f,a,optimize_ghosts(s))
   return A
 end
 
 function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:FullyAssembledRows}})
   f(x) = nothing
-  A, = _fa_create_from_nz_with_callback(f,a)
+  s    = ParallelAssemblyStrategy(a)
+  A, = _fa_create_from_nz_with_callback(f,a,optimize_ghosts(s))
   return A
 end
 
-function _fa_create_from_nz_with_callback(callback,a)
+function _fa_create_from_nz_with_callback(callback,a,optimize_ghosts=true)
 
   # Recover some data
   I,J,V = get_allocations(a)
@@ -480,17 +512,19 @@ end
 
 function Algebra.create_from_nz(a::DistributedAllocationCOO{<:SubAssembledRows})
   f(x) = nothing
-  A, = _sa_create_from_nz_with_callback(f,f,a)
+  s    = ParallelAssemblyStrategy(a)
+  A, = _sa_create_from_nz_with_callback(f,f,a,optimize_ghosts(s))
   return A
 end
 
 function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:SubAssembledRows}})
   f(x) = nothing
-  A, = _sa_create_from_nz_with_callback(f,f,a)
+  s    = ParallelAssemblyStrategy(a)
+  A, = _sa_create_from_nz_with_callback(f,f,a,optimize_ghosts(s))
   return A
 end
 
-function _sa_create_from_nz_with_callback(callback,async_callback,a)
+function _sa_create_from_nz_with_callback(callback,async_callback,a,optimize_ghosts=true)
   # Recover some data
   I,J,V = get_allocations(a)
   test_dofs_gids_prange = get_test_gids(a)
@@ -662,7 +696,8 @@ function Algebra.create_from_nz(
     _rhs_callback(c_fespace,rows)
   end
 
-  A,b = _fa_create_from_nz_with_callback(callback,a)
+  s   = ParallelAssemblyStrategy(a)
+  A,b = _fa_create_from_nz_with_callback(callback,a,optimize_ghosts(s))
   return A,b
 end
 
@@ -685,7 +720,8 @@ function Algebra.create_from_nz(
     assemble!(b)
   end
 
-  A,b = _sa_create_from_nz_with_callback(callback,async_callback,a)
+  s   = ParallelAssemblyStrategy(a)
+  A,b = _sa_create_from_nz_with_callback(callback,async_callback,a,optimize_ghosts(s))
   return A,b
 end
 
@@ -747,7 +783,7 @@ function Arrays.nz_allocation(a::PVectorCounter{<:SubAssembledRows})
   touched = map(values) do values
      fill!(Vector{Bool}(undef,length(values)),false)
   end
-  allocations=map(values,touched) do values,touched
+  allocations = map(values,touched) do values,touched
     ArrayAllocationTrackTouchedAndValues(touched,values)
   end
   return PVectorAllocationTrackTouchedAndValues(allocations,values,dofs)
diff --git a/src/BlockPartitionedArrays.jl b/src/BlockPartitionedArrays.jl
index 29e09599..39bebc7f 100644
--- a/src/BlockPartitionedArrays.jl
+++ b/src/BlockPartitionedArrays.jl
@@ -242,7 +242,7 @@ end
 
 function LinearAlgebra.fillstored!(a::BlockPMatrix,v)
   map(blocks(a)) do a
-    fillstored!(a,v)
+    LinearAlgebra.fillstored!(a,v)
   end
   return a
 end
diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index 23c18d45..f465eaaa 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -627,12 +627,12 @@ end
 
 # When using this one, make sure that you also loop over ghost cells.
 # This is at your own risk.
-function local_assembly_strategy(::FullyAssembledRows,test_space_indices,trial_space_indices)
-  test_space_local_to_ghost = local_to_ghost(test_space_indices)
+function local_assembly_strategy(::FullyAssembledRows,rows,cols)
+  rows_local_to_ghost = local_to_ghost(rows)
   GenericAssemblyStrategy(
     identity,
     identity,
-    row->test_space_local_to_ghost[row]==0,
+    row->rows_local_to_ghost[row]==0,
     col->true)
 end
 

From 203bebbd805cad5372ccf7699b482cab1d3d7cb6 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Wed, 30 Aug 2023 15:39:35 +1000
Subject: [PATCH 43/56] Implemented missing methods for BlockPArrays

---
 src/BlockPartitionedArrays.jl       | 61 +++++++++++++++++++++++++++--
 test/BlockPartitionedArraysTests.jl | 25 +++++++++---
 2 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/src/BlockPartitionedArrays.jl b/src/BlockPartitionedArrays.jl
index 39bebc7f..0b0cb307 100644
--- a/src/BlockPartitionedArrays.jl
+++ b/src/BlockPartitionedArrays.jl
@@ -162,6 +162,34 @@ function Base.fill!(a::BlockPVector,v)
   return a
 end
 
+function Base.sum(a::BlockPArray)
+  return sum(map(sum,blocks(a)))
+end
+
+Base.maximum(x::BlockPArray) = maximum(identity,x)
+function Base.maximum(f::Function,x::BlockPArray)
+  maximum(map(xi->maximum(f,xi),blocks(x)))
+end
+
+Base.minimum(x::BlockPArray) = minimum(identity,x)
+function Base.minimum(f::Function,x::BlockPArray)
+  minimum(map(xi->minimum(f,xi),blocks(x)))
+end
+
+function Base.:(==)(a::BlockPVector,b::BlockPVector)
+  A = length(a) == length(b)
+  B = all(map((ai,bi)->ai==bi,blocks(a),blocks(b)))
+  return A && B
+end
+
+function Base.any(f::Function,x::BlockPVector)
+  any(map(xi->any(f,xi),blocks(x)))
+end
+
+function Base.all(f::Function,x::PVector)
+  all(map(xi->all(f,xi),blocks(x)))
+end
+
 # AbstractBlockArray API
 
 BlockArrays.blocks(a::BlockPArray) = a.blocks
@@ -214,10 +242,35 @@ function PartitionedArrays.partition(a::BlockPArray)
 end
 
 function PartitionedArrays.to_trivial_partition(a::BlockPArray)
-  vals = map(to_trivial_partition,blocks(a))
+  vals = map(PartitionedArrays.to_trivial_partition,blocks(a))
   return mortar(vals)
 end
 
+function PartitionedArrays.local_values(a::BlockPArray)
+  vals = map(local_values,blocks(a)) |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
+function PartitionedArrays.own_values(a::BlockPArray)
+  vals = map(own_values,blocks(a)) |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
+function PartitionedArrays.ghost_values(a::BlockPArray)
+  vals = map(ghost_values,blocks(a)) |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
+function PartitionedArrays.own_ghost_values(a::BlockPMatrix)
+  vals = map(own_ghost_values,blocks(a)) |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
+function PartitionedArrays.ghost_own_values(a::BlockPMatrix)
+  vals = map(ghost_own_values,blocks(a)) |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
 # LinearAlgebra API
 
 function LinearAlgebra.mul!(y::BlockPVector,A::BlockPMatrix,x::BlockPVector)
@@ -235,9 +288,9 @@ function LinearAlgebra.dot(x::BlockPVector,y::BlockPVector)
   return sum(map(dot,blocks(x),blocks(y)))
 end
 
-function LinearAlgebra.norm(v::BlockPVector)
-  block_norms = map(norm,blocks(v))
-  return sqrt(sum(block_norms.^2))
+function LinearAlgebra.norm(v::BlockPVector,p::Real=2)
+  block_norms = map(vi->norm(vi,p),blocks(v))
+  return sum(block_norms.^p)^(1/p)
 end
 
 function LinearAlgebra.fillstored!(a::BlockPMatrix,v)
diff --git a/test/BlockPartitionedArraysTests.jl b/test/BlockPartitionedArraysTests.jl
index 2200bda3..542e1fe1 100644
--- a/test/BlockPartitionedArraysTests.jl
+++ b/test/BlockPartitionedArraysTests.jl
@@ -58,11 +58,11 @@ mul!(x,_m[1,1],_v)
 @test blocklength(block_range) == 2
 @test blocksize(block_range) == (2,)
 
-# BlockPArray
+# AbstractArray API
 
 __v = similar(v,block_range)
 __m = similar(m,(block_range,block_range))
-fill!(v,1.0)
+fill!(__v,1.0)
 
 __v = __v .+ 1.0
 __v = __v .- 1.0
@@ -74,7 +74,22 @@ __m = __m .- 1.0
 __m = __m .* 1.0
 __m = __m ./ 1.0
 
-# LinearAlgebra
+# PartitionedArrays API
+
+consistent!(__v) |> wait
+t = assemble!(__v)
+assemble!(__m) |> wait
+fetch(t);
+
+PartitionedArrays.to_trivial_partition(m)
+
+local_values(v)
+own_values(v)
+ghost_values(v)
+own_ghost_values(m)
+ghost_own_values(m)
+
+# LinearAlgebra API
 
 x = similar(v)
 mul!(x,m,v)
@@ -89,6 +104,6 @@ LinearAlgebra.fillstored!(__m,1.0)
 
 __v = BlockPVector{Float64,PVector{Vector{Float64}}}(undef,block_range)
 
-m[Block(1,1)]
-m[Block(1),Block(1)]
+maximum(abs,v)
+minimum(abs,v)
 

From 79b13ebf7ff17f8b798b16fb6d67defc71b1e990 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Wed, 30 Aug 2023 15:53:25 +1000
Subject: [PATCH 44/56] Small bugfix

---
 src/BlockPartitionedArrays.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/BlockPartitionedArrays.jl b/src/BlockPartitionedArrays.jl
index 0b0cb307..271457db 100644
--- a/src/BlockPartitionedArrays.jl
+++ b/src/BlockPartitionedArrays.jl
@@ -186,7 +186,7 @@ function Base.any(f::Function,x::BlockPVector)
   any(map(xi->any(f,xi),blocks(x)))
 end
 
-function Base.all(f::Function,x::PVector)
+function Base.all(f::Function,x::BlockPVector)
   all(map(xi->all(f,xi),blocks(x)))
 end
 

From 160cfb8809c90e92f2205c674b5866cb5b407471 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Wed, 30 Aug 2023 17:59:47 +1000
Subject: [PATCH 45/56] Add rmul!

---
 src/BlockPartitionedArrays.jl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/BlockPartitionedArrays.jl b/src/BlockPartitionedArrays.jl
index 271457db..cc30304a 100644
--- a/src/BlockPartitionedArrays.jl
+++ b/src/BlockPartitionedArrays.jl
@@ -190,6 +190,11 @@ function Base.all(f::Function,x::BlockPVector)
   all(map(xi->all(f,xi),blocks(x)))
 end
 
+function LinearAlgebra.rmul!(a::BlockPVector,v::Number)
+  map(ai->rmul!(ai,v),blocks(a))
+  return a
+end
+
 # AbstractBlockArray API
 
 BlockArrays.blocks(a::BlockPArray) = a.blocks

From 3faace9ddbd45111bf45e799031b7acb98872370 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Thu, 31 Aug 2023 14:22:52 +1000
Subject: [PATCH 46/56] Revert ParallelAssemblyStrategy

---
 src/Algebra.jl | 56 +++++++++-----------------------------------------
 1 file changed, 10 insertions(+), 46 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index 1c2132d5..7255e8d5 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -294,33 +294,8 @@ end
 
 # PSparseMatrix assembly
 
-"""
-  ParallelAssemblyStrategy(ghosted_rows::Bool,optimize_ghosts::Bool)
-
-  Two main strategies are available for parallel assembly:
-    - FullyAssembledRows: the rows of the matrix are assembled only in the process owning the row.
-    - SubAssembledRows: processors also hold ghost rows.
-    
-  Options: 
-    - optimize_ghosts: If `false`, the FESpace PRanges are used for the linear system. 
-                       If `true`, ghost ids are reduced to minimize communications. 
-"""
-struct ParallelAssemblyStrategy{GR,OG}
-  ghosted_rows::Bool
-  optimize_ghosts::Bool
-  function ParallelAssemblyStrategy(ghosted_rows::Bool,optimize_ghosts::Bool)
-    new{ghosted_rows,optimize_ghosts}(ghosted_rows,optimize_ghosts)
-  end
-end
-
-const FullyAssembledRows{OG} = ParallelAssemblyStrategy{false,OG}
-FullyAssembledRows(;optimize_ghosts=true) = ParallelAssemblyStrategy(false,optimize_ghosts)
-
-const SubAssembledRows{OG} = ParallelAssemblyStrategy{true,OG}
-SubAssembledRows(;optimize_ghosts=true) = ParallelAssemblyStrategy(true,optimize_ghosts)
-
-optimize_ghosts(::Type{ParallelAssemblyStrategy{GR,OG}}) where {GR,OG} = OG
-optimize_ghosts(a::ParallelAssemblyStrategy) = optimize_ghosts(typeof(a))
+struct FullyAssembledRows end
+struct SubAssembledRows end
 
 # For the moment we use COO format even though
 # it is quite memory consuming.
@@ -452,11 +427,6 @@ get_trial_gids(a::DistributedAllocationCOO) = a.trial_dofs_gids_prange
 get_test_gids(a::ArrayBlock{<:DistributedAllocationCOO})  = map(get_test_gids,diag(a.array))
 get_trial_gids(a::ArrayBlock{<:DistributedAllocationCOO}) = map(get_trial_gids,diag(a.array))
 
-ParallelAssemblyStrategy(a::DistributedAllocationCOO) = a.par_strategy
-function ParallelAssemblyStrategy(a::ArrayBlock{<:DistributedAllocationCOO})
-  return ParallelAssemblyStrategy(first(a.array))
-end
-
 function Algebra.create_from_nz(a::PSparseMatrix)
   # For FullyAssembledRows the underlying Exchanger should
   # not have ghost layer making assemble! do nothing (TODO check)
@@ -466,19 +436,17 @@ end
 
 function Algebra.create_from_nz(a::DistributedAllocationCOO{<:FullyAssembledRows})
   f(x) = nothing
-  s    = ParallelAssemblyStrategy(a)
-  A, = _fa_create_from_nz_with_callback(f,a,optimize_ghosts(s))
+  A, = _fa_create_from_nz_with_callback(f,a)
   return A
 end
 
 function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:FullyAssembledRows}})
   f(x) = nothing
-  s    = ParallelAssemblyStrategy(a)
-  A, = _fa_create_from_nz_with_callback(f,a,optimize_ghosts(s))
+  A, = _fa_create_from_nz_with_callback(f,a)
   return A
 end
 
-function _fa_create_from_nz_with_callback(callback,a,optimize_ghosts=true)
+function _fa_create_from_nz_with_callback(callback,a)
 
   # Recover some data
   I,J,V = get_allocations(a)
@@ -512,19 +480,17 @@ end
 
 function Algebra.create_from_nz(a::DistributedAllocationCOO{<:SubAssembledRows})
   f(x) = nothing
-  s    = ParallelAssemblyStrategy(a)
-  A, = _sa_create_from_nz_with_callback(f,f,a,optimize_ghosts(s))
+  A, = _sa_create_from_nz_with_callback(f,f,a)
   return A
 end
 
 function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:SubAssembledRows}})
   f(x) = nothing
-  s    = ParallelAssemblyStrategy(a)
-  A, = _sa_create_from_nz_with_callback(f,f,a,optimize_ghosts(s))
+  A, = _sa_create_from_nz_with_callback(f,f,a)
   return A
 end
 
-function _sa_create_from_nz_with_callback(callback,async_callback,a,optimize_ghosts=true)
+function _sa_create_from_nz_with_callback(callback,async_callback,a)
   # Recover some data
   I,J,V = get_allocations(a)
   test_dofs_gids_prange = get_test_gids(a)
@@ -696,8 +662,7 @@ function Algebra.create_from_nz(
     _rhs_callback(c_fespace,rows)
   end
 
-  s   = ParallelAssemblyStrategy(a)
-  A,b = _fa_create_from_nz_with_callback(callback,a,optimize_ghosts(s))
+  A,b = _fa_create_from_nz_with_callback(callback,a)
   return A,b
 end
 
@@ -720,8 +685,7 @@ function Algebra.create_from_nz(
     assemble!(b)
   end
 
-  s   = ParallelAssemblyStrategy(a)
-  A,b = _sa_create_from_nz_with_callback(callback,async_callback,a,optimize_ghosts(s))
+  A,b = _sa_create_from_nz_with_callback(callback,async_callback,a)
   return A,b
 end
 

From dceb81961c424455467e8f5041697d828865d59a Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Thu, 31 Aug 2023 14:28:28 +1000
Subject: [PATCH 47/56] own_and_ghost renamed to split_own_and_ghost

---
 src/FESpaces.jl   | 12 ++++++------
 src/MultiField.jl |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index f465eaaa..21aedf42 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -462,16 +462,16 @@ end
 
 # Factories
 
-function FESpaces.FESpace(model::DistributedDiscreteModel,reffe;own_and_ghost=false,kwargs...)
+function FESpaces.FESpace(model::DistributedDiscreteModel,reffe;split_own_and_ghost=false,kwargs...)
   spaces = map(local_views(model)) do m
     FESpace(m,reffe;kwargs...)
   end
   gids =  generate_gids(model,spaces)
-  vector_type = _find_vector_type(spaces,gids;own_and_ghost=own_and_ghost)
+  vector_type = _find_vector_type(spaces,gids;split_own_and_ghost=split_own_and_ghost)
   DistributedSingleFieldFESpace(spaces,gids,vector_type)
 end
 
-function FESpaces.FESpace(_trian::DistributedTriangulation,reffe;own_and_ghost=false,kwargs...)
+function FESpaces.FESpace(_trian::DistributedTriangulation,reffe;split_own_and_ghost=false,kwargs...)
   trian = add_ghost_cells(_trian)
   trian_gids = generate_cell_gids(trian)
   spaces = map(trian.trians) do t
@@ -480,15 +480,15 @@ function FESpaces.FESpace(_trian::DistributedTriangulation,reffe;own_and_ghost=f
   cell_to_ldofs = map(get_cell_dof_ids,spaces)
   nldofs = map(num_free_dofs,spaces)
   gids = generate_gids(trian_gids,cell_to_ldofs,nldofs)
-  vector_type = _find_vector_type(spaces,gids;own_and_ghost=own_and_ghost)
+  vector_type = _find_vector_type(spaces,gids;split_own_and_ghost=split_own_and_ghost)
   DistributedSingleFieldFESpace(spaces,gids,vector_type)
 end
 
-function _find_vector_type(spaces,gids;own_and_ghost=false)
+function _find_vector_type(spaces,gids;split_own_and_ghost=false)
   local_vector_type = get_vector_type(PartitionedArrays.getany(spaces))
   Tv = eltype(local_vector_type)
   T  = Vector{Tv}
-  if own_and_ghost
+  if split_own_and_ghost
     T = OwnAndGhostVectors{T}
   end
   if isa(gids,PRange)
diff --git a/src/MultiField.jl b/src/MultiField.jl
index 5b65c765..ed33e659 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -236,7 +236,7 @@ end
 # Factory
 
 function MultiField.MultiFieldFESpace(
-  f_dspace::Vector{<:DistributedSingleFieldFESpace};own_and_ghost=false, kwargs...)
+  f_dspace::Vector{<:DistributedSingleFieldFESpace};split_own_and_ghost=false, kwargs...)
   f_p_space = map(local_views,f_dspace)
   v(x...) = collect(x)
 
@@ -244,7 +244,7 @@ function MultiField.MultiFieldFESpace(
   p_mspace    = map(f->MultiFieldFESpace(f;kwargs...),p_f_space)
   style       = PartitionedArrays.getany(map(MultiFieldStyle,p_mspace))
   gids        = generate_multi_field_gids(style,f_dspace,p_mspace)
-  vector_type = _find_vector_type(p_mspace,gids;own_and_ghost=own_and_ghost)
+  vector_type = _find_vector_type(p_mspace,gids;split_own_and_ghost=split_own_and_ghost)
   DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,vector_type)
 end
 

From a9e86bdb52d1870a29f33000972bd77bcc726818 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Thu, 31 Aug 2023 14:50:40 +1000
Subject: [PATCH 48/56] Expanded tests for OwnAndGhostVectors

---
 test/FESpacesTests.jl | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/test/FESpacesTests.jl b/test/FESpacesTests.jl
index 7b0860ab..bf1e78e6 100644
--- a/test/FESpacesTests.jl
+++ b/test/FESpacesTests.jl
@@ -91,11 +91,11 @@ function main(distribute,parts,das)
   V = TestFESpace(model,reffe,dirichlet_tags="boundary")
   U = TrialFESpace(u,V)
   V2 = FESpace(Ω,reffe)
-  @test get_vector_type(V) <: PVector
-  @test get_vector_type(U) <: PVector
-  @test get_vector_type(V2) <: PVector
+  @test get_vector_type(V) <: PVector{<:Vector}
+  @test get_vector_type(U) <: PVector{<:Vector}
+  @test get_vector_type(V2) <: PVector{<:Vector}
 
-  free_values_partition=map(partition(V.gids)) do indices 
+  free_values_partition = map(partition(V.gids)) do indices 
     ones(Float64,local_length(indices))
   end 
 
@@ -163,6 +163,23 @@ function main(distribute,parts,das)
   cont  = ∫( abs2(u0h) )dΩ
   @test sqrt(sum(cont)) < 1.0e-14
 
+  # OwnAndGhostVector partitions
+  V3 = FESpace(model,reffe,dirichlet_tags="boundary",split_own_and_ghost=true)
+  U3 = TrialFESpace(u,V3)
+  @test get_vector_type(V3) <: PVector{<:OwnAndGhostVectors}
+
+  free_values = zero_free_values(U3)
+  dirichlet_values = get_dirichlet_dof_values(U3)
+  uh = interpolate_everywhere(u,U3)
+  _uh = interpolate_everywhere(uh,U3)
+  __uh = interpolate_everywhere!(_uh,free_values,dirichlet_values,U3)
+
+  uh = interpolate(u,U3)
+  dofs      = get_fe_dof_basis(U3)
+  cell_vals = dofs(uh)
+  gather_free_values!(free_values,U3,cell_vals)
+  gather_free_and_dirichlet_values!(free_values,dirichlet_values,U3,cell_vals)
+  uh = FEFunction(U3,free_values,dirichlet_values)
 
   # I need to use the square [0,2]² in the sequel so that
   # when integrating over the interior facets, the entries

From 67203fb5425e1173c8a195e26cd4131602959719 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Thu, 31 Aug 2023 15:11:11 +1000
Subject: [PATCH 49/56] Updated NEWS

---
 NEWS.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index c6c1f5a6..ea3b0e22 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -10,10 +10,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - Added support for distributed block-assembly. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
+- Add possibility to use `OwnAndGhostVector` as vector partition for `FESpace` dofs. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
+- Implement `BlockPArray <: AbstractBlockArray`, a new type that behaves as a `BlockArray{PArray}` and which fulfills the APIs of both `PArray` and `AbstractBlockArray`. This new type will be used to implement distributed block-assembly. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
+- `DistributedMultiFieldFESpace{<:BlockMultiFieldStyle}` now has a `BlockPRange` as gids and `BlockPVector` as vector type. This is necessary to create consistency between fespace and system vectors, which in turn avoids memory allocations/copies when transferring between FESpace and linear system layouts. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
 
 ### Changed
 
 - Updated compat for FillArrays to v1. Since PR[127](https://github.com/gridap/GridapDistributed.jl/pull/127).
+- Merged functionalities of `consistent_local_views` and `change_ghost`. `consistent_local_views` has been removed. `change_ghost` now has two keywargs `is_consistent` and `make_consistent` that take into consideration all possible use cases. `change_ghost` has also been optimized to avoid unnecessary allocations if possible. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
 
 ## [0.3.0] - 2023-08-16
 

From 8c4b95373e50a569dcea61dfcfee7bee77b9c05a Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Fri, 8 Sep 2023 11:37:30 +1000
Subject: [PATCH 50/56] Added local_views for BlockPArrays

---
 src/Algebra.jl                      | 78 +++++++++++++++--------------
 src/BlockPartitionedArrays.jl       | 18 ++++---
 test/BlockPartitionedArraysTests.jl | 27 +++++-----
 3 files changed, 67 insertions(+), 56 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index 7255e8d5..9135d5ef 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -143,20 +143,22 @@ function local_views(a::AbstractMatrix,rows,cols)
   @notimplemented
 end
 
-function local_views(a::AbstractArray)
-  a
-end
+local_views(a::AbstractArray) = a
+local_views(a::PRange) = partition(a)
+local_views(a::PVector) = partition(a)
+local_views(a::PSparseMatrix) = partition(a)
 
-function local_views(a::PRange)
-  partition(a)
+function local_views(a::BlockPRange)
+  map(blocks(a)) do a
+    local_views(a)
+  end |> to_parray_of_arrays
 end
 
-function local_views(a::PVector)
-  partition(a)
-end
-
-function local_views(a::PSparseMatrix)
-  partition(a)
+function local_views(a::BlockPArray)
+  vals = map(blocks(a)) do a
+    local_views(a)
+  end |> to_parray_of_arrays
+  return map(mortar,vals)
 end
 
 # change_ghost
@@ -248,42 +250,44 @@ function _lid_to_plid(lid,lid_to_plid)
   plid
 end
 
-function local_views(row_partitioned_vector::PVector,test_dofs_partition::PRange)
-  if row_partitioned_vector.index_partition === partition(test_dofs_partition)
-    @assert false
+function local_views(a::PVector,new_rows::PRange)
+  old_rows = axes(a,1)
+  if partition(old_rows) === partition(new_rows)
+    partition(a)
   else
-    map(partition(row_partitioned_vector),
-        partition(test_dofs_partition),
-        row_partitioned_vector.index_partition) do vector_partition,dofs_partition,row_partition
-      LocalView(vector_partition,(find_local_to_local_map(dofs_partition,row_partition),))
+    map(partition(a),partition(old_rows),partition(new_rows)) do vector_partition,old_rows,new_rows
+      LocalView(vector_partition,(find_local_to_local_map(new_rows,old_rows),))
     end
   end
 end
 
-function local_views(row_col_partitioned_matrix::PSparseMatrix,
-                     test_dofs_partition::PRange,
-                     trial_dofs_partition::PRange)
-  if (row_col_partitioned_matrix.row_partition === partition(test_dofs_partition) || 
-    row_col_partitioned_matrix.col_partition === partition(trial_dofs_partition) )
-    @assert false                 
-  else 
-    map(
-      partition(row_col_partitioned_matrix),
-      partition(test_dofs_partition),
-      partition(trial_dofs_partition),
-      row_col_partitioned_matrix.row_partition,
-      row_col_partitioned_matrix.col_partition) do matrix_partition,
-                                                              test_dof_partition,
-                                                              trial_dof_partition,
-                                                              row_partition,
-                                                              col_partition
-      rl2lmap = find_local_to_local_map(test_dof_partition,row_partition)
-      cl2lmap = find_local_to_local_map(trial_dof_partition,col_partition)
+function local_views(a::PSparseMatrix,new_rows::PRange,new_cols::PRange)
+  old_rows, old_cols = axes(a)
+  if (partition(old_rows) === partition(new_rows) && partition(old_cols) === partition(new_cols) )
+    partition(a)
+  else
+    map(partition(a),
+        partition(old_rows),partition(old_cols),
+        partition(new_rows),partition(new_cols)) do matrix_partition,old_rows,old_cols,new_rows,new_cols
+      rl2lmap = find_local_to_local_map(new_rows,old_rows)
+      cl2lmap = find_local_to_local_map(new_cols,old_cols)
       LocalView(matrix_partition,(rl2lmap,cl2lmap))
     end
   end
 end
 
+function local_views(a::BlockPVector,new_rows::BlockPRange)
+  vals = map(local_views,blocks(a),blocks(new_rows)) |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
+function local_views(a::BlockPMatrix,new_rows::BlockPRange,new_cols::BlockPRange)
+  vals = map(CartesianIndices(blocksize(a))) do I
+    local_views(a[Block(I)],new_rows[Block(I[1])],new_cols[Block(I[2])])
+  end |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
 function Algebra.allocate_vector(::Type{<:PVector{V}},ids::PRange) where {V}
   PVector{V}(undef,partition(ids))
 end
diff --git a/src/BlockPartitionedArrays.jl b/src/BlockPartitionedArrays.jl
index cc30304a..e6f65215 100644
--- a/src/BlockPartitionedArrays.jl
+++ b/src/BlockPartitionedArrays.jl
@@ -20,6 +20,10 @@ function PartitionedArrays.partition(a::BlockPRange)
   return map(partition,blocks(a)) |> to_parray_of_arrays
 end
 
+function Base.getindex(a::BlockPRange,inds::Block{1})
+  a.ranges[inds.n...]
+end
+
 """
 """
 struct BlockPArray{V,T,N,A,B} <: BlockArrays.AbstractBlockArray{T,N}
@@ -65,7 +69,7 @@ function BlockPVector{V}(::UndefInitializer,rows::BlockPRange) where {V}
 end
 
 function BlockPMatrix{V}(::UndefInitializer,rows::BlockPRange,cols::BlockPRange) where {V}
-  block_ids = CartesianIndices((blocklength(rows),blocklength(cols)))
+  block_ids  = CartesianIndices((blocklength(rows),blocklength(cols)))
   block_rows = blocks(rows)
   block_cols = blocks(cols)
   vals = map(block_ids) do I
@@ -315,7 +319,7 @@ end
 BlockArrays.blocks(b::BlockPBroadcasted) = b.blocks
 BlockArrays.blockaxes(b::BlockPBroadcasted) = b.axes
 
-function Base.broadcasted(f, args::Union{BlockPVector,BlockPBroadcasted}...)
+function Base.broadcasted(f, args::Union{BlockPArray,BlockPBroadcasted}...)
   a1 = first(args)
   @boundscheck @assert all(ai -> blockaxes(ai) == blockaxes(a1),args)
   
@@ -325,18 +329,18 @@ function Base.broadcasted(f, args::Union{BlockPVector,BlockPBroadcasted}...)
   return BlockPBroadcasted(blocks_out,blockaxes(a1))
 end
 
-function Base.broadcasted(f, a::Number, b::Union{BlockPVector,BlockPBroadcasted})
+function Base.broadcasted(f, a::Number, b::Union{BlockPArray,BlockPBroadcasted})
   blocks_out = map(b->Base.broadcasted(f,a,b),blocks(b))
   return BlockPBroadcasted(blocks_out,blockaxes(b))
 end
 
-function Base.broadcasted(f, a::Union{BlockPVector,BlockPBroadcasted}, b::Number)
+function Base.broadcasted(f, a::Union{BlockPArray,BlockPBroadcasted}, b::Number)
   blocks_out = map(a->Base.broadcasted(f,a,b),blocks(a))
   return BlockPBroadcasted(blocks_out,blockaxes(a))
 end
 
 function Base.broadcasted(f,
-                        a::Union{BlockPVector,BlockPBroadcasted},
+                        a::Union{BlockPArray,BlockPBroadcasted},
                         b::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}})
   Base.broadcasted(f,a,Base.materialize(b))
 end
@@ -344,7 +348,7 @@ end
 function Base.broadcasted(
   f,
   a::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}},
-  b::Union{BlockPVector,BlockPBroadcasted})
+  b::Union{BlockPArray,BlockPBroadcasted})
   Base.broadcasted(f,Base.materialize(a),b)
 end
 
@@ -353,7 +357,7 @@ function Base.materialize(b::BlockPBroadcasted)
   return mortar(blocks_out)
 end
 
-function Base.materialize!(a::BlockPVector,b::BlockPBroadcasted)
+function Base.materialize!(a::BlockPArray,b::BlockPBroadcasted)
   map(Base.materialize!,blocks(a),blocks(b))
   return a
 end
diff --git a/test/BlockPartitionedArraysTests.jl b/test/BlockPartitionedArraysTests.jl
index 542e1fe1..3e810085 100644
--- a/test/BlockPartitionedArraysTests.jl
+++ b/test/BlockPartitionedArraysTests.jl
@@ -9,7 +9,6 @@ using LinearAlgebra
 
 using GridapDistributed: BlockPArray, BlockPVector, BlockPMatrix, BlockPRange
 
-
 ranks = with_debug() do distribute
   distribute(LinearIndices((2,)))
 end
@@ -33,6 +32,7 @@ block_range = BlockPRange([PRange(indices),PRange(indices)])
 
 _v = PVector{OwnAndGhostVectors{Vector{Float64}}}(undef,indices)
 v = BlockPArray([_v,_v],(block_range,))
+fill!(v,1.0)
 
 _m = map(CartesianIndices((2,2))) do I
   i,j = I[1],I[2]
@@ -47,7 +47,7 @@ _m = map(CartesianIndices((2,2))) do I
     end
   end
   PSparseMatrix(local_mats,indices,indices)
-end
+end;
 m = BlockPArray(_m,(block_range,block_range))
 
 x = similar(_v)
@@ -69,11 +69,6 @@ __v = __v .- 1.0
 __v = __v .* 1.0
 __v = __v ./ 1.0
 
-__m = __m .+ 1.0
-__m = __m .- 1.0
-__m = __m .* 1.0
-__m = __m ./ 1.0
-
 # PartitionedArrays API
 
 consistent!(__v) |> wait
@@ -83,6 +78,8 @@ fetch(t);
 
 PartitionedArrays.to_trivial_partition(m)
 
+partition(v)
+partition(m)
 local_values(v)
 own_values(v)
 ghost_values(v)
@@ -90,20 +87,26 @@ own_ghost_values(m)
 ghost_own_values(m)
 
 # LinearAlgebra API
-
+fill!(v,1.0)
 x = similar(v)
 mul!(x,m,v)
-consistent!(x) |> fetch
-partition(x)
+consistent!(x) |> wait
 
-dot(v,x)
+@test dot(v,x) ≈ 36
 norm(v)
 copy!(x,v)
 
 LinearAlgebra.fillstored!(__m,1.0)
 
-__v = BlockPVector{Float64,PVector{Vector{Float64}}}(undef,block_range)
+__v = BlockPVector{Vector{Float64}}(undef,block_range)
+#__m = BlockPMatrix{SparseMatrixCSC{Float64,Int64}}(undef,block_range,block_range)
 
 maximum(abs,v)
 minimum(abs,v)
 
+# GridapDistributed API
+v_parts = local_views(v)
+m_parts = local_views(m)
+
+v_parts = local_views(v,block_range)
+m_parts = local_views(m,block_range,block_range)

From eee16f92ec1247cdd3d074987e3eaa806d9f1109 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Mon, 2 Oct 2023 12:34:40 +1100
Subject: [PATCH 51/56] Minor

---
 src/Algebra.jl | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/Algebra.jl b/src/Algebra.jl
index 9135d5ef..e8fedd02 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -1,3 +1,14 @@
+
+# Vector allocation 
+
+function Algebra.allocate_vector(::Type{<:PVector{V}},ids::PRange) where {V}
+  PVector{V}(undef,partition(ids))
+end
+
+function Algebra.allocate_vector(::Type{<:BlockPVector{V}},ids::BlockPRange) where {V}
+  BlockPVector{V}(undef,ids)
+end
+
 # This might go to Gridap in the future. We keep it here for the moment.
 function change_axes(a::Algebra.ArrayCounter,axes)
   @notimplemented
@@ -288,14 +299,6 @@ function local_views(a::BlockPMatrix,new_rows::BlockPRange,new_cols::BlockPRange
   return map(mortar,vals)
 end
 
-function Algebra.allocate_vector(::Type{<:PVector{V}},ids::PRange) where {V}
-  PVector{V}(undef,partition(ids))
-end
-
-function Algebra.allocate_vector(::Type{<:BlockPVector{V}},ids::BlockPRange) where {V}
-  BlockPVector{V}(undef,ids)
-end
-
 # PSparseMatrix assembly
 
 struct FullyAssembledRows end

From b11170d39ff8d29dc72cdb582a4e918cda049a5b Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Mon, 2 Oct 2023 14:17:28 +1100
Subject: [PATCH 52/56] Added Adaptivity.jl and BlockPartitionedArrays.jl to
 docs

---
 docs/src/Adaptivity.md | 6 ++++++
 docs/src/MultiField.md | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)
 create mode 100644 docs/src/Adaptivity.md

diff --git a/docs/src/Adaptivity.md b/docs/src/Adaptivity.md
new file mode 100644
index 00000000..56ef09eb
--- /dev/null
+++ b/docs/src/Adaptivity.md
@@ -0,0 +1,6 @@
+# Adaptivity
+
+```@autodocs
+Modules = [GridapDistributed]
+Pages   = ["Adaptivity.jl"]
+```
diff --git a/docs/src/MultiField.md b/docs/src/MultiField.md
index 4b7c2318..f1472582 100644
--- a/docs/src/MultiField.md
+++ b/docs/src/MultiField.md
@@ -2,5 +2,5 @@
 
 ```@autodocs
 Modules = [GridapDistributed]
-Pages   = ["MultiField.jl"]
-```
\ No newline at end of file
+Pages   = ["MultiField.jl","BlockPartitionedArrays.jl"]
+```

From 84be1f543b59caff9998bad75204e6af1aec09e4 Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Mon, 2 Oct 2023 14:37:22 +1100
Subject: [PATCH 53/56] Minor changes to docs

---
 docs/make.jl                  | 4 ++--
 src/Adaptivity.jl             | 1 +
 src/BlockPartitionedArrays.jl | 3 ++-
 src/MultiField.jl             | 1 -
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index c10f7e5f..2963f1d5 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -11,16 +11,16 @@ pages = [
   "Geometry" => "Geometry.md",
   "MultiField" => "MultiField.md",
   "Visualization" => "Visualization.md",
+  "Adaptivity" => "Adaptivity.md",
  ]
 
-
 makedocs(;
     modules=[GridapDistributed],
     format=Documenter.HTML(),
     pages=pages,
     repo="https://github.com/gridap/GridapDistributed.jl/blob/{commit}{path}#L{line}",
     sitename="GridapDistributed.jl",
-    authors="S. Badia <santiago.badia@monash.edu>, A. F. Martin <alberto.martin@monash.edu>, F. Verdugo <fverdugo@cimne.upc.edu>",
+    authors="S. Badia <santiago.badia@monash.edu>, A. F. Martin <alberto.f.martin@anu.edu.au>, F. Verdugo <fverdugo@cimne.upc.edu>",
 )
 
 deploydocs(;
diff --git a/src/Adaptivity.jl b/src/Adaptivity.jl
index 0045fc63..1ce0d7ff 100644
--- a/src/Adaptivity.jl
+++ b/src/Adaptivity.jl
@@ -22,6 +22,7 @@ end
   RedistributeGlue
 
   Glue linking two distributions of the same mesh.
+  
   - `new_parts`: Array with the new part IDs (and comms)
   - `old_parts`: Array with the old part IDs (and comms)
   - `parts_rcv`: Array with the part IDs from which each part receives
diff --git a/src/BlockPartitionedArrays.jl b/src/BlockPartitionedArrays.jl
index e6f65215..5661432f 100644
--- a/src/BlockPartitionedArrays.jl
+++ b/src/BlockPartitionedArrays.jl
@@ -1,5 +1,6 @@
 
 """
+  struct BlockPRange{A} <: AbstractUnitRange{Int}
 """
 struct BlockPRange{A} <: AbstractUnitRange{Int}
   ranges::Vector{PRange{A}}
@@ -25,6 +26,7 @@ function Base.getindex(a::BlockPRange,inds::Block{1})
 end
 
 """
+  struct BlockPArray{V,T,N,A,B} <: BlockArrays.AbstractBlockArray{T,N}
 """
 struct BlockPArray{V,T,N,A,B} <: BlockArrays.AbstractBlockArray{T,N}
   blocks::Array{A,N}
@@ -361,4 +363,3 @@ function Base.materialize!(a::BlockPArray,b::BlockPBroadcasted)
   map(Base.materialize!,blocks(a),blocks(b))
   return a
 end
-
diff --git a/src/MultiField.jl b/src/MultiField.jl
index ed33e659..fa1f16e9 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -450,7 +450,6 @@ end
 
 # SparseMatrixAssembler API
 
-#! The following could be avoided if DistributedBlockSparseMatrixAssembler <: DistributedSparseMatrixAssembler
 function FESpaces.symbolic_loop_matrix!(A,a::DistributedBlockSparseMatrixAssembler,matdata)
   rows = get_rows(a)
   cols = get_cols(a)

From 7457e58efa921c4e9a0a911179c21a23b7c2f5cc Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Mon, 2 Oct 2023 14:59:10 +1100
Subject: [PATCH 54/56] More fixes to docs

---
 docs/src/DivConformingFESpaces.md | 6 ------
 docs/src/FESpaces.md              | 2 +-
 src/DivConformingFESpaces.jl      | 3 +--
 3 files changed, 2 insertions(+), 9 deletions(-)
 delete mode 100644 docs/src/DivConformingFESpaces.md

diff --git a/docs/src/DivConformingFESpaces.md b/docs/src/DivConformingFESpaces.md
deleted file mode 100644
index 1a4419e9..00000000
--- a/docs/src/DivConformingFESpaces.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# DivConformingFESpaces
-
-```@autodocs
-Modules = [GridapDistributed]
-Pages   = ["DivConformingFESpaces.jl"]
-```
\ No newline at end of file
diff --git a/docs/src/FESpaces.md b/docs/src/FESpaces.md
index 83fc837c..37f10693 100644
--- a/docs/src/FESpaces.md
+++ b/docs/src/FESpaces.md
@@ -2,5 +2,5 @@
 
 ```@autodocs
 Modules = [GridapDistributed]
-Pages   = ["FESpaces.jl"]
+Pages   = ["FESpaces.jl","DivConformingFESpaces.jl"]
 ```
\ No newline at end of file
diff --git a/src/DivConformingFESpaces.jl b/src/DivConformingFESpaces.jl
index bfd5f3dd..ac31e505 100644
--- a/src/DivConformingFESpaces.jl
+++ b/src/DivConformingFESpaces.jl
@@ -1,5 +1,4 @@
-"""
-"""
+
 function FESpaces.FESpace(model::DistributedDiscreteModel,
                           reffe::Tuple{RaviartThomas,Any,Any};
                           conformity=nothing,kwargs...)

From a5fb46e86cc3d69726d7462f9381f1cd693f2d7c Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Mon, 2 Oct 2023 15:01:08 +1100
Subject: [PATCH 55/56] Minor

---
 docs/make.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/make.jl b/docs/make.jl
index 2963f1d5..73d36b0a 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -6,7 +6,6 @@ pages = [
   "GridapDistributed" => "GridapDistributed.md",
   "Algebra" => "Algebra.md",
   "CellData" => "CellData.md",
-  "DivConformingFESpaces" => "DivConformingFESpaces.md",
   "FESpaces" => "FESpaces.md",
   "Geometry" => "Geometry.md",
   "MultiField" => "MultiField.md",
@@ -21,6 +20,7 @@ makedocs(;
     repo="https://github.com/gridap/GridapDistributed.jl/blob/{commit}{path}#L{line}",
     sitename="GridapDistributed.jl",
     authors="S. Badia <santiago.badia@monash.edu>, A. F. Martin <alberto.f.martin@anu.edu.au>, F. Verdugo <fverdugo@cimne.upc.edu>",
+    # warnonly=true, # for debugging
 )
 
 deploydocs(;

From 90df49be8092ebc82718315408cbfe8503851dba Mon Sep 17 00:00:00 2001
From: JordiManyer <jordi.manyer@monash.edu>
Date: Mon, 2 Oct 2023 15:52:05 +1100
Subject: [PATCH 56/56] Updated NEWS.md

---
 NEWS.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 21bbe1f2..649e540e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -9,25 +9,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- Added missing _get_cell_dof_ids_inner_space() method overload. Since PR[130](https://github.com/gridap/GridapDistributed.jl/pull/130).
-- Added missing remove_ghost_cells() overload for AdaptiveTriangulation. Since PR[131](https://github.com/gridap/GridapDistributed.jl/pull/131).
 - Added support for distributed block-assembly. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
 - Add possibility to use `OwnAndGhostVector` as vector partition for `FESpace` dofs. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
 - Implement `BlockPArray <: AbstractBlockArray`, a new type that behaves as a `BlockArray{PArray}` and which fulfills the APIs of both `PArray` and `AbstractBlockArray`. This new type will be used to implement distributed block-assembly. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
 - `DistributedMultiFieldFESpace{<:BlockMultiFieldStyle}` now has a `BlockPRange` as gids and `BlockPVector` as vector type. This is necessary to create consistency between fespace and system vectors, which in turn avoids memory allocations/copies when transferring between FESpace and linear system layouts. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
 
+### Changed
+
+- Merged functionalities of `consistent_local_views` and `change_ghost`. `consistent_local_views` has been removed. `change_ghost` now has two keywargs `is_consistent` and `make_consistent` that take into consideration all possible use cases. `change_ghost` has also been optimized to avoid unnecessary allocations if possible. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
+
 ## [0.3.1] - 2023-10-01
 
 ### Added
 
 - Added missing _get_cell_dof_ids_inner_space() method overload. Since PR[130](https://github.com/gridap/GridapDistributed.jl/pull/130).
 - Added missing remove_ghost_cells() overload for AdaptiveTriangulation. Since PR[131](https://github.com/gridap/GridapDistributed.jl/pull/131).
-- Added support for distributed block-assembly. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
 
 ### Changed
 
 - Updated compat for FillArrays to v1. Since PR[127](https://github.com/gridap/GridapDistributed.jl/pull/127).
-- Merged functionalities of `consistent_local_views` and `change_ghost`. `consistent_local_views` has been removed. `change_ghost` now has two keywargs `is_consistent` and `make_consistent` that take into consideration all possible use cases. `change_ghost` has also been optimized to avoid unnecessary allocations if possible. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
 
 ## [0.3.0] - 2023-08-16