diff --git a/.gitignore b/.gitignore
index 91a3e882..4f9205f8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@
 /dev/
 /docs/build/
 /docs/site/
+/docs/Manifest.toml
 /tmp/
 *.vtu
 *.pvtu
diff --git a/NEWS.md b/NEWS.md
index 9d399950..649e540e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,7 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
-### Added 
+### Added
+
+- Added support for distributed block-assembly. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
+- Add possibility to use `OwnAndGhostVector` as vector partition for `FESpace` dofs. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
+- Implement `BlockPArray <: AbstractBlockArray`, a new type that behaves as a `BlockArray{PArray}` and which fulfills the APIs of both `PArray` and `AbstractBlockArray`. This new type will be used to implement distributed block-assembly. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
+- `DistributedMultiFieldFESpace{<:BlockMultiFieldStyle}` now has a `BlockPRange` as gids and `BlockPVector` as vector type. This is necessary to create consistency between fespace and system vectors, which in turn avoids memory allocations/copies when transferring between FESpace and linear system layouts. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
+
+### Changed
+
+- Merged functionalities of `consistent_local_views` and `change_ghost`. `consistent_local_views` has been removed. `change_ghost` now has two keywargs `is_consistent` and `make_consistent` that take into consideration all possible use cases. `change_ghost` has also been optimized to avoid unnecessary allocations if possible. Since PR [124](https://github.com/gridap/GridapDistributed.jl/pull/124).
+
+## [0.3.1] - 2023-10-01
+
+### Added
 
 - Added missing _get_cell_dof_ids_inner_space() method overload. Since PR[130](https://github.com/gridap/GridapDistributed.jl/pull/130).
 - Added missing remove_ghost_cells() overload for AdaptiveTriangulation. Since PR[131](https://github.com/gridap/GridapDistributed.jl/pull/131).
diff --git a/Project.toml b/Project.toml
index 3d4c7ab4..62fbbf10 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,6 +4,7 @@ authors = ["S. Badia <santiago.badia@monash.edu>", "A. F. Martin <alberto.f.mart
 version = "0.3.1"
 
 [deps]
+BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 Gridap = "56d4f2e9-7ea1-5844-9cf6-b9c51ca7ce8e"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/docs/Manifest.toml b/docs/Manifest.toml
deleted file mode 100644
index f25a7a58..00000000
--- a/docs/Manifest.toml
+++ /dev/null
@@ -1,524 +0,0 @@
-# This file is machine-generated - editing it directly is not advised
-
-[[AbstractTrees]]
-git-tree-sha1 = "03e0550477d86222521d254b741d470ba17ea0b5"
-uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
-version = "0.3.4"
-
-[[ArgTools]]
-uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
-
-[[ArrayInterface]]
-deps = ["Compat", "IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"]
-git-tree-sha1 = "745233d77146ad221629590b6d82fe7f1ddb478f"
-uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
-version = "4.0.3"
-
-[[ArrayLayouts]]
-deps = ["FillArrays", "LinearAlgebra", "SparseArrays"]
-git-tree-sha1 = "e1ba79094cae97b688fb42d31cbbfd63a69706e4"
-uuid = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
-version = "0.7.8"
-
-[[Artifacts]]
-uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
-
-[[BSON]]
-git-tree-sha1 = "ebcd6e22d69f21249b7b8668351ebf42d6dc87a1"
-uuid = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
-version = "0.3.4"
-
-[[Base64]]
-uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
-
-[[BlockArrays]]
-deps = ["ArrayLayouts", "FillArrays", "LinearAlgebra"]
-git-tree-sha1 = "5524e27323cf4c4505699c3fb008c3f772269945"
-uuid = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
-version = "0.16.9"
-
-[[ChainRulesCore]]
-deps = ["Compat", "LinearAlgebra", "SparseArrays"]
-git-tree-sha1 = "f9982ef575e19b0e5c7a98c6e75ee496c0f73a93"
-uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-version = "1.12.0"
-
-[[ChangesOfVariables]]
-deps = ["ChainRulesCore", "LinearAlgebra", "Test"]
-git-tree-sha1 = "bf98fa45a0a4cee295de98d4c1462be26345b9a1"
-uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
-version = "0.1.2"
-
-[[CodecZlib]]
-deps = ["TranscodingStreams", "Zlib_jll"]
-git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da"
-uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
-version = "0.7.0"
-
-[[Combinatorics]]
-git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860"
-uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
-version = "1.0.2"
-
-[[CommonSubexpressions]]
-deps = ["MacroTools", "Test"]
-git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7"
-uuid = "bbf7d656-a473-5ed7-a52c-81e309532950"
-version = "0.3.0"
-
-[[Compat]]
-deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
-git-tree-sha1 = "44c37b4636bc54afac5c574d2d02b625349d6582"
-uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
-version = "3.41.0"
-
-[[CompilerSupportLibraries_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
-
-[[DataStructures]]
-deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
-git-tree-sha1 = "3daef5523dd2e769dad2365274f760ff5f282c7d"
-uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
-version = "0.18.11"
-
-[[Dates]]
-deps = ["Printf"]
-uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
-
-[[DelimitedFiles]]
-deps = ["Mmap"]
-uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
-
-[[DiffResults]]
-deps = ["StaticArrays"]
-git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805"
-uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
-version = "1.0.3"
-
-[[DiffRules]]
-deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"]
-git-tree-sha1 = "84083a5136b6abf426174a58325ffd159dd6d94f"
-uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
-version = "1.9.1"
-
-[[Distances]]
-deps = ["LinearAlgebra", "SparseArrays", "Statistics", "StatsAPI"]
-git-tree-sha1 = "3258d0659f812acde79e8a74b11f17ac06d0ca04"
-uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
-version = "0.10.7"
-
-[[Distributed]]
-deps = ["Random", "Serialization", "Sockets"]
-uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
-
-[[DocStringExtensions]]
-deps = ["LibGit2", "Markdown", "Pkg", "Test"]
-git-tree-sha1 = "88bb0edb352b16608036faadcc071adda068582a"
-uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-version = "0.8.1"
-
-[[Documenter]]
-deps = ["Base64", "Dates", "DocStringExtensions", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
-git-tree-sha1 = "bc99c157ff2957c058a1067061d16c2c83d1ec42"
-uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-version = "0.24.9"
-
-[[Downloads]]
-deps = ["ArgTools", "LibCURL", "NetworkOptions"]
-uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
-
-[[FastGaussQuadrature]]
-deps = ["LinearAlgebra", "SpecialFunctions", "StaticArrays"]
-git-tree-sha1 = "58d83dd5a78a36205bdfddb82b1bb67682e64487"
-uuid = "442a2c76-b920-505d-bb47-c5924d526838"
-version = "0.4.9"
-
-[[FileIO]]
-deps = ["Pkg", "Requires", "UUIDs"]
-git-tree-sha1 = "80ced645013a5dbdc52cf70329399c35ce007fae"
-uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
-version = "1.13.0"
-
-[[FillArrays]]
-deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"]
-git-tree-sha1 = "deed294cde3de20ae0b2e0355a6c4e1c6a5ceffc"
-uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
-version = "0.12.8"
-
-[[FiniteDiff]]
-deps = ["ArrayInterface", "LinearAlgebra", "Requires", "SparseArrays", "StaticArrays"]
-git-tree-sha1 = "6eae72e9943d8992d14359c32aed5f892bda1569"
-uuid = "6a86dc24-6348-571c-b903-95158fe2bd41"
-version = "2.10.0"
-
-[[ForwardDiff]]
-deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions", "StaticArrays"]
-git-tree-sha1 = "1bd6fc0c344fc0cbee1f42f8d2e7ec8253dda2d2"
-uuid = "f6369f11-7733-5829-9624-2563aa707210"
-version = "0.10.25"
-
-[[Gridap]]
-deps = ["AbstractTrees", "BSON", "BlockArrays", "Combinatorics", "DocStringExtensions", "FastGaussQuadrature", "FileIO", "FillArrays", "ForwardDiff", "JLD2", "JSON", "LineSearches", "LinearAlgebra", "NLsolve", "NearestNeighbors", "QuadGK", "Random", "SparseArrays", "SparseMatricesCSR", "StaticArrays", "Test", "WriteVTK"]
-git-tree-sha1 = "8170171ad94936897cd082c0cd6a39f4f5671c0d"
-uuid = "56d4f2e9-7ea1-5844-9cf6-b9c51ca7ce8e"
-version = "0.17.7"
-
-[[GridapDistributed]]
-deps = ["FillArrays", "Gridap", "LinearAlgebra", "MPI", "PartitionedArrays", "SparseArrays", "SparseMatricesCSR", "WriteVTK"]
-path = "/home/amartin/git-repos/GridapDistributed.jl"
-uuid = "f9701e48-63b3-45aa-9a63-9bc6c271f355"
-version = "0.2.4"
-
-[[IfElse]]
-git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1"
-uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
-version = "0.1.1"
-
-[[InteractiveUtils]]
-deps = ["Markdown"]
-uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
-
-[[InverseFunctions]]
-deps = ["Test"]
-git-tree-sha1 = "a7254c0acd8e62f1ac75ad24d5db43f5f19f3c65"
-uuid = "3587e190-3f89-42d0-90ee-14403ec27112"
-version = "0.1.2"
-
-[[IrrationalConstants]]
-git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151"
-uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
-version = "0.1.1"
-
-[[IterativeSolvers]]
-deps = ["LinearAlgebra", "Printf", "Random", "RecipesBase", "SparseArrays"]
-git-tree-sha1 = "1169632f425f79429f245113b775a0e3d121457c"
-uuid = "42fd0dbc-a981-5370-80f2-aaf504508153"
-version = "0.9.2"
-
-[[JLD2]]
-deps = ["DataStructures", "FileIO", "MacroTools", "Mmap", "Pkg", "Printf", "Reexport", "TranscodingStreams", "UUIDs"]
-git-tree-sha1 = "b528d68220e2aba1d2d0c0461b6f7eda8c5c1e33"
-uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
-version = "0.4.20"
-
-[[JLLWrappers]]
-deps = ["Preferences"]
-git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1"
-uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
-version = "1.4.1"
-
-[[JSON]]
-deps = ["Dates", "Mmap", "Parsers", "Unicode"]
-git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e"
-uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-version = "0.21.0"
-
-[[LazyArtifacts]]
-deps = ["Artifacts", "Pkg"]
-uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
-
-[[LibCURL]]
-deps = ["LibCURL_jll", "MozillaCACerts_jll"]
-uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
-
-[[LibCURL_jll]]
-deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
-uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
-
-[[LibGit2]]
-deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
-uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
-
-[[LibSSH2_jll]]
-deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
-uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
-
-[[Libdl]]
-uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-
-[[Libiconv_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778"
-uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
-version = "1.16.1+1"
-
-[[LightXML]]
-deps = ["Libdl", "XML2_jll"]
-git-tree-sha1 = "e129d9391168c677cd4800f5c0abb1ed8cb3794f"
-uuid = "9c8b4983-aa76-5018-a973-4c85ecc9e179"
-version = "0.9.0"
-
-[[LineSearches]]
-deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"]
-git-tree-sha1 = "f27132e551e959b3667d8c93eae90973225032dd"
-uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255"
-version = "7.1.1"
-
-[[LinearAlgebra]]
-deps = ["Libdl"]
-uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-
-[[LogExpFunctions]]
-deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"]
-git-tree-sha1 = "e5718a00af0ab9756305a0392832c8952c7426c1"
-uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
-version = "0.3.6"
-
-[[Logging]]
-uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
-
-[[MPI]]
-deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "Pkg", "Random", "Requires", "Serialization", "Sockets"]
-git-tree-sha1 = "d56a80d8cf8b9dc3050116346b3d83432b1912c0"
-uuid = "da04e1cc-30fd-572f-bb4f-1f8673147195"
-version = "0.19.2"
-
-[[MPICH_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "4b754a51cec47b0366056efcf6e19d95b2ecb54c"
-uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4"
-version = "4.0.0+0"
-
-[[MacroTools]]
-deps = ["Markdown", "Random"]
-git-tree-sha1 = "3d3e902b31198a27340d0bf00d6ac452866021cf"
-uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
-version = "0.5.9"
-
-[[Markdown]]
-deps = ["Base64"]
-uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
-
-[[MbedTLS_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
-
-[[MicrosoftMPI_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "a16aa086d335ed7e0170c5265247db29172af2f9"
-uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf"
-version = "10.1.3+2"
-
-[[Mmap]]
-uuid = "a63ad114-7e13-5084-954f-fe012c677804"
-
-[[MozillaCACerts_jll]]
-uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
-
-[[NLSolversBase]]
-deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"]
-git-tree-sha1 = "50310f934e55e5ca3912fb941dec199b49ca9b68"
-uuid = "d41bc354-129a-5804-8e4c-c37616107c6c"
-version = "7.8.2"
-
-[[NLsolve]]
-deps = ["Distances", "LineSearches", "LinearAlgebra", "NLSolversBase", "Printf", "Reexport"]
-git-tree-sha1 = "019f12e9a1a7880459d0173c182e6a99365d7ac1"
-uuid = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
-version = "4.5.1"
-
-[[NaNMath]]
-git-tree-sha1 = "b086b7ea07f8e38cf122f5016af580881ac914fe"
-uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
-version = "0.3.7"
-
-[[NearestNeighbors]]
-deps = ["Distances", "StaticArrays"]
-git-tree-sha1 = "16baacfdc8758bc374882566c9187e785e85c2f0"
-uuid = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
-version = "0.4.9"
-
-[[NetworkOptions]]
-uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
-
-[[OpenLibm_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
-
-[[OpenMPI_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"]
-git-tree-sha1 = "6340586e076b2abd41f5ba1a3b9c774ec6b30fde"
-uuid = "fe0851c0-eecd-5654-98d4-656369965a5c"
-version = "4.1.2+0"
-
-[[OpenSpecFun_jll]]
-deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
-uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
-version = "0.5.5+0"
-
-[[OrderedCollections]]
-git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
-uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-version = "1.4.1"
-
-[[Parameters]]
-deps = ["OrderedCollections", "UnPack"]
-git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe"
-uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
-version = "0.12.3"
-
-[[Parsers]]
-deps = ["Dates", "Test"]
-git-tree-sha1 = "f8f5d2d4b4b07342e5811d2b6428e45524e241df"
-uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "1.0.2"
-
-[[PartitionedArrays]]
-deps = ["Distances", "IterativeSolvers", "LinearAlgebra", "MPI", "Printf", "SparseArrays", "SparseMatricesCSR"]
-git-tree-sha1 = "a80a117437dda3fe1f37f29ae56ce6259af1cbca"
-uuid = "5a9dfac6-5c52-46f7-8278-5e2210713be9"
-version = "0.2.9"
-
-[[Pkg]]
-deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
-uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-
-[[Preferences]]
-deps = ["TOML"]
-git-tree-sha1 = "2cf929d64681236a2e074ffafb8d568733d2e6af"
-uuid = "21216c6a-2e73-6563-6e65-726566657250"
-version = "1.2.3"
-
-[[Printf]]
-deps = ["Unicode"]
-uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-
-[[QuadGK]]
-deps = ["DataStructures", "LinearAlgebra"]
-git-tree-sha1 = "78aadffb3efd2155af139781b8a8df1ef279ea39"
-uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
-version = "2.4.2"
-
-[[REPL]]
-deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
-uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
-
-[[Random]]
-deps = ["Serialization"]
-uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-
-[[RecipesBase]]
-git-tree-sha1 = "6bf3f380ff52ce0832ddd3a2a7b9538ed1bcca7d"
-uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
-version = "1.2.1"
-
-[[Reexport]]
-git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
-uuid = "189a3867-3050-52da-a836-e630ba90ab69"
-version = "1.2.2"
-
-[[Requires]]
-deps = ["UUIDs"]
-git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
-uuid = "ae029012-a4dd-5104-9daa-d747884805df"
-version = "1.3.0"
-
-[[SHA]]
-uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
-
-[[Serialization]]
-uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-
-[[SharedArrays]]
-deps = ["Distributed", "Mmap", "Random", "Serialization"]
-uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
-
-[[Sockets]]
-uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
-
-[[SparseArrays]]
-deps = ["LinearAlgebra", "Random"]
-uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
-
-[[SparseMatricesCSR]]
-deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
-git-tree-sha1 = "4870b3e7db7063927b163fb981bd579410b68b2d"
-uuid = "a0a7dd2c-ebf4-11e9-1f05-cf50bc540ca1"
-version = "0.6.6"
-
-[[SpecialFunctions]]
-deps = ["ChainRulesCore", "IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
-git-tree-sha1 = "8d0c8e3d0ff211d9ff4a0c2307d876c99d10bdf1"
-uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
-version = "2.1.2"
-
-[[Static]]
-deps = ["IfElse"]
-git-tree-sha1 = "d4da8b728580709d736704764e55d6ef38cb7c87"
-uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
-version = "0.5.3"
-
-[[StaticArrays]]
-deps = ["LinearAlgebra", "Random", "Statistics"]
-git-tree-sha1 = "a635a9333989a094bddc9f940c04c549cd66afcf"
-uuid = "90137ffa-7385-5640-81b9-e52037218182"
-version = "1.3.4"
-
-[[Statistics]]
-deps = ["LinearAlgebra", "SparseArrays"]
-uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
-
-[[StatsAPI]]
-git-tree-sha1 = "d88665adc9bcf45903013af0982e2fd05ae3d0a6"
-uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
-version = "1.2.0"
-
-[[SuiteSparse]]
-deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
-uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
-
-[[TOML]]
-deps = ["Dates"]
-uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
-
-[[Tar]]
-deps = ["ArgTools", "SHA"]
-uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
-
-[[Test]]
-deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
-uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[[TranscodingStreams]]
-deps = ["Random", "Test"]
-git-tree-sha1 = "216b95ea110b5972db65aa90f88d8d89dcb8851c"
-uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
-version = "0.9.6"
-
-[[UUIDs]]
-deps = ["Random", "SHA"]
-uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
-
-[[UnPack]]
-git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
-uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
-version = "1.0.2"
-
-[[Unicode]]
-uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
-
-[[WriteVTK]]
-deps = ["Base64", "CodecZlib", "FillArrays", "LightXML", "TranscodingStreams"]
-git-tree-sha1 = "ed7e4a2b4320da7f15feed343c55cfd54a85dc66"
-uuid = "64499a7a-5c06-52f2-abe2-ccb03c286192"
-version = "1.14.0"
-
-[[XML2_jll]]
-deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
-git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"
-uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a"
-version = "2.9.12+0"
-
-[[Zlib_jll]]
-deps = ["Libdl"]
-uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
-
-[[nghttp2_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
-
-[[p7zip_jll]]
-deps = ["Artifacts", "Libdl"]
-uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
diff --git a/docs/make.jl b/docs/make.jl
index c10f7e5f..73d36b0a 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -6,21 +6,21 @@ pages = [
   "GridapDistributed" => "GridapDistributed.md",
   "Algebra" => "Algebra.md",
   "CellData" => "CellData.md",
-  "DivConformingFESpaces" => "DivConformingFESpaces.md",
   "FESpaces" => "FESpaces.md",
   "Geometry" => "Geometry.md",
   "MultiField" => "MultiField.md",
   "Visualization" => "Visualization.md",
+  "Adaptivity" => "Adaptivity.md",
  ]
 
-
 makedocs(;
     modules=[GridapDistributed],
     format=Documenter.HTML(),
     pages=pages,
     repo="https://github.com/gridap/GridapDistributed.jl/blob/{commit}{path}#L{line}",
     sitename="GridapDistributed.jl",
-    authors="S. Badia <santiago.badia@monash.edu>, A. F. Martin <alberto.martin@monash.edu>, F. Verdugo <fverdugo@cimne.upc.edu>",
+    authors="S. Badia <santiago.badia@monash.edu>, A. F. Martin <alberto.f.martin@anu.edu.au>, F. Verdugo <fverdugo@cimne.upc.edu>",
+    # warnonly=true, # for debugging
 )
 
 deploydocs(;
diff --git a/docs/src/Adaptivity.md b/docs/src/Adaptivity.md
new file mode 100644
index 00000000..56ef09eb
--- /dev/null
+++ b/docs/src/Adaptivity.md
@@ -0,0 +1,6 @@
+# Adaptivity
+
+```@autodocs
+Modules = [GridapDistributed]
+Pages   = ["Adaptivity.jl"]
+```
diff --git a/docs/src/DivConformingFESpaces.md b/docs/src/DivConformingFESpaces.md
deleted file mode 100644
index 1a4419e9..00000000
--- a/docs/src/DivConformingFESpaces.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# DivConformingFESpaces
-
-```@autodocs
-Modules = [GridapDistributed]
-Pages   = ["DivConformingFESpaces.jl"]
-```
\ No newline at end of file
diff --git a/docs/src/FESpaces.md b/docs/src/FESpaces.md
index 83fc837c..37f10693 100644
--- a/docs/src/FESpaces.md
+++ b/docs/src/FESpaces.md
@@ -2,5 +2,5 @@
 
 ```@autodocs
 Modules = [GridapDistributed]
-Pages   = ["FESpaces.jl"]
+Pages   = ["FESpaces.jl","DivConformingFESpaces.jl"]
 ```
\ No newline at end of file
diff --git a/docs/src/MultiField.md b/docs/src/MultiField.md
index 4b7c2318..f1472582 100644
--- a/docs/src/MultiField.md
+++ b/docs/src/MultiField.md
@@ -2,5 +2,5 @@
 
 ```@autodocs
 Modules = [GridapDistributed]
-Pages   = ["MultiField.jl"]
-```
\ No newline at end of file
+Pages   = ["MultiField.jl","BlockPartitionedArrays.jl"]
+```
diff --git a/src/Adaptivity.jl b/src/Adaptivity.jl
index 0045fc63..1ce0d7ff 100644
--- a/src/Adaptivity.jl
+++ b/src/Adaptivity.jl
@@ -22,6 +22,7 @@ end
   RedistributeGlue
 
   Glue linking two distributions of the same mesh.
+  
   - `new_parts`: Array with the new part IDs (and comms)
   - `old_parts`: Array with the old part IDs (and comms)
   - `parts_rcv`: Array with the part IDs from which each part receives
diff --git a/src/Algebra.jl b/src/Algebra.jl
index 52d07d57..e8fedd02 100644
--- a/src/Algebra.jl
+++ b/src/Algebra.jl
@@ -1,3 +1,14 @@
+
+# Vector allocation 
+
+function Algebra.allocate_vector(::Type{<:PVector{V}},ids::PRange) where {V}
+  PVector{V}(undef,partition(ids))
+end
+
+function Algebra.allocate_vector(::Type{<:BlockPVector{V}},ids::BlockPRange) where {V}
+  BlockPVector{V}(undef,ids)
+end
+
 # This might go to Gridap in the future. We keep it here for the moment.
 function change_axes(a::Algebra.ArrayCounter,axes)
   @notimplemented
@@ -5,17 +16,36 @@ end
 
 # This might go to Gridap in the future. We keep it here for the moment.
 function change_axes(a::Algebra.CounterCOO{T,A}, axes::A) where {T,A}
-  b=Algebra.CounterCOO{T}(axes)
+  b = Algebra.CounterCOO{T}(axes)
   b.nnz = a.nnz
   b
 end
 
 # This might go to Gridap in the future. We keep it here for the moment.
 function change_axes(a::Algebra.AllocationCOO{T,A}, axes::A) where {T,A}
-  counter=change_axes(a.counter,axes)
+  counter = change_axes(a.counter,axes)
   Algebra.AllocationCOO(counter,a.I,a.J,a.V)
 end
 
+# Array of PArrays -> PArray of Arrays 
+function to_parray_of_arrays(a::AbstractArray{<:MPIArray})
+  indices = linear_indices(first(a))
+  map(indices) do i
+    map(a) do aj
+      PartitionedArrays.getany(aj)
+    end
+  end
+end
+
+function to_parray_of_arrays(a::AbstractArray{<:DebugArray})
+  indices = linear_indices(first(a))
+  map(indices) do i
+    map(a) do aj
+      aj.items[i]
+    end
+  end
+end
+
 # This type is required because MPIArray from PArrays 
 # cannot be instantiated with a NULL communicator
 struct MPIVoidVector{T} <: AbstractVector{T}
@@ -112,8 +142,8 @@ function local_views(a)
   @abstractmethod
 end
 
-function get_parts(x)
-  return linear_indices(local_views(x))
+function get_parts(a)
+  return linear_indices(local_views(a))
 end
 
 function local_views(a::AbstractVector,rows)
@@ -124,24 +154,56 @@ function local_views(a::AbstractMatrix,rows,cols)
   @notimplemented
 end
 
-function consistent_local_views(a,ids,isconsistent)
-  @abstractmethod
+local_views(a::AbstractArray) = a
+local_views(a::PRange) = partition(a)
+local_views(a::PVector) = partition(a)
+local_views(a::PSparseMatrix) = partition(a)
+
+function local_views(a::BlockPRange)
+  map(blocks(a)) do a
+    local_views(a)
+  end |> to_parray_of_arrays
 end
 
-function local_views(a::AbstractArray)
-  a
+function local_views(a::BlockPArray)
+  vals = map(blocks(a)) do a
+    local_views(a)
+  end |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
+# change_ghost
+
+function change_ghost(a::PVector{T},ids::PRange;is_consistent=false,make_consistent=false) where T
+  same_partition = (a.index_partition === partition(ids))
+  a_new = same_partition ? a : change_ghost(T,a,ids)
+  if make_consistent && (!same_partition || !is_consistent)
+    consistent!(a_new) |> wait
+  end
+  return a_new
 end
 
-function local_views(a::PRange)
-  partition(a)
+function change_ghost(::Type{<:AbstractVector},a::PVector,ids::PRange)
+  a_new = similar(a,eltype(a),(ids,))
+  # Equivalent to copy!(a_new,a) but does not check that owned indices match
+  map(copy!,own_values(a_new),own_values(a))
+  return a_new
 end
 
-function local_views(a::PVector)
-  partition(a)
+function change_ghost(::Type{<:OwnAndGhostVectors},a::PVector,ids::PRange)
+  values = map(own_values(a),partition(ids)) do own_vals,ids
+    ghost_vals = fill(zero(eltype(a)),ghost_length(ids))
+    perm = PartitionedArrays.local_permutation(ids)
+    OwnAndGhostVectors(own_vals,ghost_vals,perm)
+  end
+  return PVector(values,partition(ids))
 end
 
-function local_views(a::PSparseMatrix)
-  partition(a)
+function change_ghost(a::BlockPVector,ids::BlockPRange;is_consistent=false,make_consistent=false)
+  vals = map(blocks(a),blocks(ids)) do a, ids
+    change_ghost(a,ids;is_consistent=is_consistent,make_consistent=make_consistent)
+  end
+  return BlockPVector(vals,ids)
 end
 
 # This function computes a mapping among the local identifiers of a and b
@@ -199,71 +261,46 @@ function _lid_to_plid(lid,lid_to_plid)
   plid
 end
 
-function local_views(row_partitioned_vector::PVector,test_dofs_partition::PRange)
-  if row_partitioned_vector.index_partition === partition(test_dofs_partition)
-    @assert false
+function local_views(a::PVector,new_rows::PRange)
+  old_rows = axes(a,1)
+  if partition(old_rows) === partition(new_rows)
+    partition(a)
   else
-    map(partition(row_partitioned_vector),
-        partition(test_dofs_partition),
-        row_partitioned_vector.index_partition) do vector_partition,dofs_partition,row_partition
-      LocalView(vector_partition,(find_local_to_local_map(dofs_partition,row_partition),))
+    map(partition(a),partition(old_rows),partition(new_rows)) do vector_partition,old_rows,new_rows
+      LocalView(vector_partition,(find_local_to_local_map(new_rows,old_rows),))
     end
   end
 end
 
-function local_views(row_col_partitioned_matrix::PSparseMatrix,
-                     test_dofs_partition::PRange,
-                     trial_dofs_partition::PRange)
-    if (row_col_partitioned_matrix.row_partition === partition(test_dofs_partition) || 
-      row_col_partitioned_matrix.col_partition === partition(trial_dofs_partition) )
-      @assert false                 
-    else 
-      map(
-        partition(row_col_partitioned_matrix),
-        partition(test_dofs_partition),
-        partition(trial_dofs_partition),
-        row_col_partitioned_matrix.row_partition,
-        row_col_partitioned_matrix.col_partition) do matrix_partition,
-                                                                test_dof_partition,
-                                                                trial_dof_partition,
-                                                                row_partition,
-                                                                col_partition
-        rl2lmap = find_local_to_local_map(test_dof_partition,row_partition)
-        cl2lmap = find_local_to_local_map(trial_dof_partition,col_partition)
-        LocalView(matrix_partition,(rl2lmap,cl2lmap))
-      end
-    end
-end
-
-function change_ghost(a::PVector,ids_fespace::PRange)
-  if a.index_partition === partition(ids_fespace)
-    a_fespace = a
+function local_views(a::PSparseMatrix,new_rows::PRange,new_cols::PRange)
+  old_rows, old_cols = axes(a)
+  if (partition(old_rows) === partition(new_rows) && partition(old_cols) === partition(new_cols) )
+    partition(a)
   else
-    a_fespace = similar(a,eltype(a),(ids_fespace,))
-    a_fespace .= a
+    map(partition(a),
+        partition(old_rows),partition(old_cols),
+        partition(new_rows),partition(new_cols)) do matrix_partition,old_rows,old_cols,new_rows,new_cols
+      rl2lmap = find_local_to_local_map(new_rows,old_rows)
+      cl2lmap = find_local_to_local_map(new_cols,old_cols)
+      LocalView(matrix_partition,(rl2lmap,cl2lmap))
+    end
   end
-  a_fespace
 end
 
-function consistent_local_views(a::PVector,
-                                ids_fespace::PRange,
-                                isconsistent)
-  a_fespace = change_ghost(a,ids_fespace)
-  if ! isconsistent
-    fetch_vector_ghost_values!(partition(a_fespace),
-                               map(reverse,a_fespace.cache)) |> wait
-  end
-  partition(a_fespace)
+function local_views(a::BlockPVector,new_rows::BlockPRange)
+  vals = map(local_views,blocks(a),blocks(new_rows)) |> to_parray_of_arrays
+  return map(mortar,vals)
 end
 
-function Algebra.allocate_vector(::Type{<:PVector{V,A}},ids::PRange) where {V,A}
-  values = map(partition(ids)) do ids
-    Tv = eltype(A)
-    Tv(undef,length(local_to_owner(ids)))
-  end
-  PVector(values,partition(ids))
+function local_views(a::BlockPMatrix,new_rows::BlockPRange,new_cols::BlockPRange)
+  vals = map(CartesianIndices(blocksize(a))) do I
+    local_views(a[Block(I)],new_rows[Block(I[1])],new_cols[Block(I[2])])
+  end |> to_parray_of_arrays
+  return map(mortar,vals)
 end
 
+# PSparseMatrix assembly
+
 struct FullyAssembledRows end
 struct SubAssembledRows end
 
@@ -287,6 +324,12 @@ function Algebra.nz_counter(
   DistributedCounterCOO(builder.par_strategy,counters,test_dofs_gids_prange,trial_dofs_gids_prange)
 end
 
+function Algebra.get_array_type(::PSparseMatrixBuilderCOO{Tv}) where Tv
+  T = eltype(Tv)
+  return PSparseMatrix{T}
+end
+
+
 """
 """
 struct DistributedCounterCOO{A,B,C,D} <: GridapType
@@ -322,7 +365,7 @@ function Algebra.nz_allocation(a::DistributedCounterCOO)
   DistributedAllocationCOO(a.par_strategy,allocs,a.test_dofs_gids_prange,a.trial_dofs_gids_prange)
 end
 
-struct DistributedAllocationCOO{A,B,C,D} <:GridapType
+struct DistributedAllocationCOO{A,B,C,D} <: GridapType
   par_strategy::A
   allocs::B
   test_dofs_gids_prange::C
@@ -342,13 +385,23 @@ end
 
 function change_axes(a::DistributedAllocationCOO{A,B,<:PRange,<:PRange},
                      axes::Tuple{<:PRange,<:PRange}) where {A,B}
-  local_axes=map(partition(axes[1]),partition(axes[2])) do rows,cols
+  local_axes = map(partition(axes[1]),partition(axes[2])) do rows,cols
     (Base.OneTo(local_length(rows)), Base.OneTo(local_length(cols)))
   end
-  allocs=map(change_axes,a.allocs,local_axes)
+  allocs = map(change_axes,a.allocs,local_axes)
   DistributedAllocationCOO(a.par_strategy,allocs,axes[1],axes[2])
 end
 
+function change_axes(a::MatrixBlock{<:DistributedAllocationCOO},
+                     axes::Tuple{<:Vector,<:Vector})
+  block_ids  = CartesianIndices(a.array)
+  rows, cols = axes
+  array = map(block_ids) do I
+    change_axes(a[I],(rows[I[1]],cols[I[2]]))
+  end
+  return ArrayBlock(array,a.touched)
+end
+
 function local_views(a::DistributedAllocationCOO)
   a.allocs
 end
@@ -359,23 +412,28 @@ function local_views(a::DistributedAllocationCOO,test_dofs_gids_prange,trial_dof
   a.allocs
 end
 
-function first_gdof_from_ids(ids)
-  lid_to_gid=local_to_global(ids) 
-  owner_to_lid=own_to_local(ids)
-  own_length(ids)>0 ? Int(lid_to_gid[first(owner_to_lid)]) : 1
+function local_views(a::MatrixBlock{<:DistributedAllocationCOO})
+  array = map(local_views,a.array) |> to_parray_of_arrays
+  return map(ai -> ArrayBlock(ai,a.touched),array)
 end
 
-function find_gid_and_owner(ighost_to_jghost,jindices)
-  jghost_to_local=ghost_to_local(jindices)
-  jlocal_to_global=local_to_global(jindices)
-  jlocal_to_owner=local_to_owner(jindices)
-  ighost_to_jlocal = view(jghost_to_local,ighost_to_jghost)
+function get_allocations(a::DistributedAllocationCOO)
+  I,J,V = map(local_views(a)) do alloc
+    alloc.I, alloc.J, alloc.V
+  end |> tuple_of_arrays
+  return I,J,V
+end
 
-  ighost_to_global = jlocal_to_global[ighost_to_jlocal]
-  ighost_to_owner = jlocal_to_owner[ighost_to_jlocal]
-  ighost_to_global, ighost_to_owner
+function get_allocations(a::ArrayBlock{<:DistributedAllocationCOO})
+  tuple_of_array_of_parrays = map(get_allocations,a.array) |> tuple_of_arrays
+  return tuple_of_array_of_parrays
 end
 
+get_test_gids(a::DistributedAllocationCOO)  = a.test_dofs_gids_prange
+get_trial_gids(a::DistributedAllocationCOO) = a.trial_dofs_gids_prange
+get_test_gids(a::ArrayBlock{<:DistributedAllocationCOO})  = map(get_test_gids,diag(a.array))
+get_trial_gids(a::ArrayBlock{<:DistributedAllocationCOO}) = map(get_trial_gids,diag(a.array))
+
 function Algebra.create_from_nz(a::PSparseMatrix)
   # For FullyAssembledRows the underlying Exchanger should
   # not have ghost layer making assemble! do nothing (TODO check)
@@ -386,218 +444,76 @@ end
 function Algebra.create_from_nz(a::DistributedAllocationCOO{<:FullyAssembledRows})
   f(x) = nothing
   A, = _fa_create_from_nz_with_callback(f,a)
-  A
+  return A
 end
 
-# The given ids are assumed to be a sub-set of the lids
-function ghost_lids_touched(a::AbstractLocalIndices,gids::AbstractVector{<:Integer})
-  i = 0
-  ghost_lids_touched = fill(false,ghost_length(a))
-  glo_to_loc=global_to_local(a)
-  loc_to_gho=local_to_ghost(a)
-  for gid in gids
-    lid = glo_to_loc[gid]
-    ghost_lid = loc_to_gho[lid]
-    if ghost_lid > 0 && !ghost_lids_touched[ghost_lid]
-      ghost_lids_touched[ghost_lid] = true
-      i += 1
-    end
-  end
-  gids_ghost_lid_to_ghost_lid = Vector{Int32}(undef,i)
-  i = 0
-  ghost_lids_touched .= false
-  for gid in gids
-    lid = glo_to_loc[gid]
-    ghost_lid = loc_to_gho[lid]
-    if ghost_lid > 0 && !ghost_lids_touched[ghost_lid]
-      ghost_lids_touched[ghost_lid] = true
-      i += 1
-      gids_ghost_lid_to_ghost_lid[i] = ghost_lid
-    end
-  end
-  gids_ghost_lid_to_ghost_lid
+function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:FullyAssembledRows}})
+  f(x) = nothing
+  A, = _fa_create_from_nz_with_callback(f,a)
+  return A
 end
 
-# Find the neighbours of partition1 trying 
-# to use those in partition2 as a hint 
-function _find_neighbours!(partition1, partition2)
-  partition2_snd, partition2_rcv = assembly_neighbors(partition2)
-  partition2_graph = ExchangeGraph(partition2_snd, partition2_rcv)
-  assembly_neighbors(partition1; neighbors=partition2_graph)
-end 
-
 function _fa_create_from_nz_with_callback(callback,a)
 
   # Recover some data
-  I,J,V = map(a.allocs) do alloc
-    alloc.I, alloc.J, alloc.V
-  end |> tuple_of_arrays
-  test_dofs_gids_prange = a.test_dofs_gids_prange
-  trial_dofs_gids_prange = a.trial_dofs_gids_prange
-  test_dofs_gids_partition = partition(test_dofs_gids_prange)
-  trial_dofs_gids_partition = partition(trial_dofs_gids_prange)
-  ngcdofs = length(trial_dofs_gids_prange)
-  nocdofs = map(own_length,trial_dofs_gids_partition)
-
-  rows = _setup_prange_rows_without_ghosts(test_dofs_gids_prange)
+  I,J,V = get_allocations(a)
+  test_dofs_gids_prange  = get_test_gids(a)
+  trial_dofs_gids_prange = get_trial_gids(a)
 
+  rows = _setup_prange(test_dofs_gids_prange,I;ghost=false,ax=:rows)
   b = callback(rows)
 
   # convert I and J to global dof ids
-  map(to_global!,I,test_dofs_gids_partition)
-  map(to_global!,J,trial_dofs_gids_partition)
+  to_global_indices!(I,test_dofs_gids_prange;ax=:rows)
+  to_global_indices!(J,trial_dofs_gids_prange;ax=:cols)
 
   # Create the range for cols
-  # Note that we are calling here the _setup_rows_prange(...) function even though we 
-  # are setting up the range for the cols. Inherent to the FullyAssembledRows() 
-  # assembly strategy with a single layer of ghost cells 
-  # is the fact that "The global column identifiers of matrix entries 
-  # located in rows that a given processor owns have to be such they belong to the set of 
-  # ghost DoFs in the local partition of the FE space corresponding to such processor."
-  # Any entry in the global matrix sparsity pattern that fulfills this condition is just 
-  # ignored in the FullyAssembledRows() assembly strategy with a single layer of ghost cells 
-  cols = _setup_rows_prange(trial_dofs_gids_prange,J)
-
+  cols = _setup_prange(trial_dofs_gids_prange,J;ax=:cols)
 
   # Convert again I,J to local numeration
-  map(to_local!,I,partition(rows))
-  map(to_local!,J,partition(cols))
+  to_local_indices!(I,rows;ax=:rows)
+  to_local_indices!(J,cols;ax=:cols)
 
   # Adjust local matrix size to linear system's index sets
-  asys=change_axes(a,(rows,cols))
+  asys = change_axes(a,(rows,cols))
 
   # Compress local portions
-  values = map(create_from_nz,asys.allocs)
+  values = map(create_from_nz,local_views(asys))
 
   # Finally build the matrix
-  A = PSparseMatrix(values,partition(rows),partition(cols))
-
-  A, b
+  A = _setup_matrix(values,rows,cols)
+  return A, b
 end
 
 function Algebra.create_from_nz(a::DistributedAllocationCOO{<:SubAssembledRows})
   f(x) = nothing
   A, = _sa_create_from_nz_with_callback(f,f,a)
-  A
+  return A
 end
 
-function _generate_column_owner(J,trial_dofs_partition)
-  map(J,trial_dofs_partition) do J, indices 
-    glo_to_loc=global_to_local(indices) 
-    loc_to_own=local_to_owner(indices)
-     map(x->loc_to_own[glo_to_loc[x]], J)
-  end 
-end 
-
-function assemble_coo_with_column_owner!(I,J,Jown,V,row_partition)
-  """
-    Returns three JaggedArrays with the coo triplets
-    to be sent to the corresponding owner parts in parts_snd
-  """
-  function setup_snd(part,parts_snd,row_lids,coo_entries_with_column_owner)
-      global_to_local_row = global_to_local(row_lids)
-      local_row_to_owner = local_to_owner(row_lids)
-      owner_to_i = Dict(( owner=>i for (i,owner) in enumerate(parts_snd) ))
-      ptrs = zeros(Int32,length(parts_snd)+1)
-      k_gi, k_gj, k_jo, k_v = coo_entries_with_column_owner
-      for k in 1:length(k_gi)
-          gi = k_gi[k]
-          li = global_to_local_row[gi]
-          owner = local_row_to_owner[li]
-          if owner != part
-              ptrs[owner_to_i[owner]+1] +=1
-          end
-      end
-      PArrays.length_to_ptrs!(ptrs)
-      gi_snd_data = zeros(eltype(k_gi),ptrs[end]-1)
-      gj_snd_data = zeros(eltype(k_gj),ptrs[end]-1)
-      jo_snd_data = zeros(eltype(k_jo),ptrs[end]-1)
-      v_snd_data = zeros(eltype(k_v),ptrs[end]-1)
-      for k in 1:length(k_gi)
-          gi = k_gi[k]
-          li = global_to_local_row[gi]
-          owner = local_row_to_owner[li]
-          if owner != part
-              gj = k_gj[k]
-              v = k_v[k]
-              p = ptrs[owner_to_i[owner]]
-              gi_snd_data[p] = gi
-              gj_snd_data[p] = gj
-              jo_snd_data[p] = k_jo[k]
-              v_snd_data[p] = v
-              k_v[k] = zero(v)
-              ptrs[owner_to_i[owner]] += 1
-          end
-      end
-      PArrays.rewind_ptrs!(ptrs)
-      gi_snd = JaggedArray(gi_snd_data,ptrs)
-      gj_snd = JaggedArray(gj_snd_data,ptrs)
-      jo_snd = JaggedArray(jo_snd_data,ptrs)
-      v_snd = JaggedArray(v_snd_data,ptrs)
-      gi_snd, gj_snd, jo_snd, v_snd
-  end
-  """
-    Pushes to coo_entries_with_column_owner the tuples 
-    gi_rcv,gj_rcv,jo_rcv,v_rcv received from remote processes
-  """
-  function setup_rcv!(coo_entries_with_column_owner,gi_rcv,gj_rcv,jo_rcv,v_rcv)
-      k_gi, k_gj, k_jo, k_v = coo_entries_with_column_owner
-      current_n = length(k_gi)
-      new_n = current_n + length(gi_rcv.data)
-      resize!(k_gi,new_n)
-      resize!(k_gj,new_n)
-      resize!(k_jo,new_n)
-      resize!(k_v,new_n)
-      for p in 1:length(gi_rcv.data)
-          k_gi[current_n+p] = gi_rcv.data[p]
-          k_gj[current_n+p] = gj_rcv.data[p]
-          k_jo[current_n+p] = jo_rcv.data[p]
-          k_v[current_n+p] = v_rcv.data[p]
-      end
-  end
-  part = linear_indices(row_partition)
-  parts_snd, parts_rcv = assembly_neighbors(row_partition)
-  coo_entries_with_column_owner = map(tuple,I,J,Jown,V)
-  gi_snd, gj_snd, jo_snd, v_snd = map(setup_snd,part,parts_snd,row_partition,coo_entries_with_column_owner) |> tuple_of_arrays
-  graph = ExchangeGraph(parts_snd,parts_rcv)
-  t1 = exchange(gi_snd,graph)
-  t2 = exchange(gj_snd,graph)
-  t3 = exchange(jo_snd,graph)
-  t4 = exchange(v_snd,graph)
-  @async begin
-      gi_rcv = fetch(t1)
-      gj_rcv = fetch(t2)
-      jo_rcv = fetch(t3)
-      v_rcv = fetch(t4)
-      map(setup_rcv!,coo_entries_with_column_owner,gi_rcv,gj_rcv,jo_rcv,v_rcv)
-      I,J,Jown,V
-  end
+function Algebra.create_from_nz(a::ArrayBlock{<:DistributedAllocationCOO{<:SubAssembledRows}})
+  f(x) = nothing
+  A, = _sa_create_from_nz_with_callback(f,f,a)
+  return A
 end
 
-
 function _sa_create_from_nz_with_callback(callback,async_callback,a)
   # Recover some data
-  I,J,V = map(a.allocs) do alloc
-    alloc.I, alloc.J, alloc.V
-  end |> tuple_of_arrays
-  test_dofs_gids_prange = a.test_dofs_gids_prange
-  trial_dofs_gids_prange = a.trial_dofs_gids_prange
-  test_dofs_gids_partition = partition(test_dofs_gids_prange)
-  trial_dofs_gids_partition = partition(trial_dofs_gids_prange)
-  ngrdofs = length(test_dofs_gids_prange)
-  ngcdofs = length(test_dofs_gids_prange)
+  I,J,V = get_allocations(a)
+  test_dofs_gids_prange = get_test_gids(a)
+  trial_dofs_gids_prange = get_trial_gids(a)
 
   # convert I and J to global dof ids
-  map(to_global!,I,test_dofs_gids_partition)
-  map(to_global!,J,trial_dofs_gids_partition)
+  to_global_indices!(I,test_dofs_gids_prange;ax=:rows)
+  to_global_indices!(J,trial_dofs_gids_prange;ax=:cols)
 
   # Create the Prange for the rows
-  rows = _setup_rows_prange(test_dofs_gids_prange,I)
+  rows = _setup_prange(test_dofs_gids_prange,I;ax=:rows)
   
   # Move (I,J,V) triplets to the owner process of each row I.
   # The triplets are accompanyed which Jo which is the process column owner
-  Jo=_generate_column_owner(J,trial_dofs_gids_partition)
-  t=assemble_coo_with_column_owner!(I,J,Jo,V,partition(rows))
+  Jo = get_gid_owners(J,trial_dofs_gids_prange;ax=:cols)
+  t  = _assemble_coo!(I,J,V,rows;owners=Jo)
 
   # Here we can overlap computations
   # This is a good place to overlap since
@@ -608,32 +524,34 @@ function _sa_create_from_nz_with_callback(callback,async_callback,a)
   wait(t)
 
   # Create the Prange for the cols
-  cols = _setup_cols_prange(trial_dofs_gids_prange,J,Jo)
+  cols = _setup_prange(trial_dofs_gids_prange,J;ax=:cols,owners=Jo)
 
   # Overlap rhs communications with CSC compression
   t2 = async_callback(b)
 
   # Convert again I,J to local numeration
-  map(to_local!,I,partition(rows))
-  map(to_local!,J,partition(cols))
+  to_local_indices!(I,rows;ax=:rows)
+  to_local_indices!(J,cols;ax=:cols)
 
   # Adjust local matrix size to linear system's index sets
-  asys=change_axes(a,(rows,cols))
+  asys = change_axes(a,(rows,cols))
 
   # Compress the local matrices
-  values = map(create_from_nz,asys.allocs)
+  values = map(create_from_nz,local_views(asys))
 
   # Wait the transfer to finish
-  if t2 !== nothing
+  if !isa(t2,Nothing)
     wait(t2)
   end
 
   # Finally build the matrix
-  A = PSparseMatrix(values,partition(rows),partition(cols))
-
-  A, b
+  A = _setup_matrix(values,rows,cols)
+  return A, b
 end
 
+
+# PVector assembly 
+
 struct PVectorBuilder{T,B}
   local_vector_type::Type{T}
   par_strategy::B
@@ -649,6 +567,11 @@ function Algebra.nz_counter(builder::PVectorBuilder,axs::Tuple{<:PRange})
   PVectorCounter(builder.par_strategy,counters,rows)
 end
 
+function Algebra.get_array_type(::PVectorBuilder{Tv}) where Tv
+  T = eltype(Tv)
+  return PVector{T}
+end
+
 struct PVectorCounter{A,B,C}
   par_strategy::A
   counters::B
@@ -687,91 +610,8 @@ function local_views(a::PVectorAllocationTrackOnlyValues,rows)
   a.values
 end
 
-# Create PRange for the rows of the linear system
-# without local ghost dofs as per required in the 
-# FullyAssembledRows() parallel assembly strategy 
-function _setup_prange_rows_without_ghosts(test_dofs_gids_prange)
-  ngdofs = length(test_dofs_gids_prange)
-  test_dofs_gids_partition = partition(test_dofs_gids_prange)
-  nodofs = map(own_length,test_dofs_gids_partition)
-  rindices=map(test_dofs_gids_partition) do dofs_indices 
-    owner = part_id(dofs_indices)
-    own_indices=OwnIndices(ngdofs,owner,own_to_global(dofs_indices))
-    ghost_indices=GhostIndices(ngdofs,Int64[],Int32[])
-    OwnAndGhostIndices(own_indices,ghost_indices)
-  end
-  PRange(rindices)
-end
-
-function _setup_rows_prange(test_dofs_gids_prange,I)
-  ngdofs = length(test_dofs_gids_prange)
-  test_dofs_gids_partition = partition(test_dofs_gids_prange)
-  I_ghost_lids_to_test_dofs_ghost_lids = map(ghost_lids_touched,test_dofs_gids_partition,I)
-  _setup_rows_prange_impl_(ngdofs,I_ghost_lids_to_test_dofs_ghost_lids,test_dofs_gids_partition)
-end
-
-function _setup_rows_prange_impl_(ngdofs,I_ghost_lids_to_dofs_ghost_lids,test_dofs_gids_partition)
-  gids_ghost_to_global, gids_ghost_to_owner = map(
-    find_gid_and_owner,I_ghost_lids_to_dofs_ghost_lids,test_dofs_gids_partition) |> tuple_of_arrays
-
-  indices=map(test_dofs_gids_partition, 
-               gids_ghost_to_global, 
-               gids_ghost_to_owner) do dofs_indices, ghost_to_global, ghost_to_owner 
-     owner = part_id(dofs_indices)
-     own_indices=OwnIndices(ngdofs,owner,own_to_global(dofs_indices))
-     ghost_indices=GhostIndices(ngdofs,ghost_to_global,ghost_to_owner)
-     OwnAndGhostIndices(own_indices,ghost_indices)
-  end
-  # Here we are assuming that the sparse communication graph underlying test_dofs_gids_partition
-  # is a superset of the one underlying indices. This is (has to be) true for the rows of the linear system.
-  # The precondition required for the consistency of any parallel assembly process in GridapDistributed 
-  # is that each processor can determine locally with a single layer of ghost cells the global indices and associated 
-  # processor owners of the rows that it touches after assembly of integration terms posed on locally-owned entities 
-  # (i.e., either cells or faces). 
-  _find_neighbours!(indices, test_dofs_gids_partition)
-  PRange(indices)
-end 
-
-function _setup_cols_prange(trial_dofs_gids_prange,J,Jown)
-  ngdofs = length(trial_dofs_gids_prange)
-  trial_dofs_gids_partition = partition(trial_dofs_gids_prange)
-   
-  J_ghost_to_global, J_ghost_to_owner = map(J,Jown,trial_dofs_gids_partition) do J, Jown, trial_dofs_gids_partition
-    ghost_touched=Dict{Int,Bool}()
-    ghost_to_global=Int64[] 
-    ghost_to_owner=Int64[]
-    me=part_id(trial_dofs_gids_partition)
-    for (j,jo) in zip(J,Jown)
-      if jo!=me
-        if !haskey(ghost_touched,j)
-          push!(ghost_to_global,j)
-          push!(ghost_to_owner,jo)
-          ghost_touched[j]=true
-        end
-      end
-    end
-    ghost_to_global, ghost_to_owner
-  end |> tuple_of_arrays
-
-  indices=map(trial_dofs_gids_partition, 
-              J_ghost_to_global,
-              J_ghost_to_owner) do dofs_indices, ghost_to_global, ghost_to_owner 
-     owner = part_id(dofs_indices)
-     own_indices=OwnIndices(ngdofs,owner,own_to_global(dofs_indices))
-     ghost_indices=GhostIndices(ngdofs,ghost_to_global,ghost_to_owner)
-     OwnAndGhostIndices(own_indices,ghost_indices)
-  end
-  # Here we cannot assume that the sparse communication graph underlying 
-  # trial_dofs_gids_partition is a superset of the one underlying indices.
-  # Here we chould check whether it is included and call _find_neighbours!()
-  # if this is the case. At present, we are not taking advantage of this, 
-  # but let the parallel scalable algorithm to compute the reciprocal to do the 
-  # job. 
-  PRange(indices)
-end 
-
 function Algebra.create_from_nz(a::PVectorAllocationTrackOnlyValues{<:FullyAssembledRows})
-  rows = _setup_prange_rows_without_ghosts(a.test_dofs_gids_prange)
+  rows = _setup_prange_without_ghosts(a.test_dofs_gids_prange)
   _rhs_callback(a,rows)
 end
 
@@ -818,7 +658,7 @@ end
 
 function Algebra.create_from_nz(a::PVector)
   assemble!(a) |> wait
-  a
+  return a
 end
 
 function Algebra.create_from_nz(
@@ -830,7 +670,7 @@ function Algebra.create_from_nz(
   end
 
   A,b = _fa_create_from_nz_with_callback(callback,a)
-  A,b
+  return A,b
 end
 
 struct PVectorAllocationTrackTouchedAndValues{A,B,C}
@@ -853,7 +693,7 @@ function Algebra.create_from_nz(
   end
 
   A,b = _sa_create_from_nz_with_callback(callback,async_callback,a)
-  A,b
+  return A,b
 end
 
 struct ArrayAllocationTrackTouchedAndValues{A}
@@ -877,7 +717,7 @@ end
     if !(a.touched[i])
       a.touched[i]=true
     end
-    if v!=nothing
+    if !isa(v,Nothing)
       a.values[i]=c(v,a.values[i])
     end
   end
@@ -887,7 +727,7 @@ end
   @notimplemented
 end
 @inline function Arrays.add_entries!(c::Function,a::ArrayAllocationTrackTouchedAndValues,v,i)
-  if v != nothing
+  if !isa(v,Nothing)
     for (ve,ie) in zip(v,i)
       Arrays.add_entry!(c,a,ve,ie)
     end
@@ -901,11 +741,11 @@ end
 
 function Arrays.nz_allocation(a::DistributedCounterCOO{<:SubAssembledRows},
                               b::PVectorCounter{<:SubAssembledRows})
-  A = nz_allocation(a)
-  dofs = b.test_dofs_gids_prange
+  A      = nz_allocation(a)
+  dofs   = b.test_dofs_gids_prange
   values = map(nz_allocation,b.counters)
-  B=PVectorAllocationTrackOnlyValues(b.par_strategy,values,dofs)
-  A,B
+  B = PVectorAllocationTrackOnlyValues(b.par_strategy,values,dofs)
+  return A,B
 end
 
 function Arrays.nz_allocation(a::PVectorCounter{<:SubAssembledRows})
@@ -914,10 +754,10 @@ function Arrays.nz_allocation(a::PVectorCounter{<:SubAssembledRows})
   touched = map(values) do values
      fill!(Vector{Bool}(undef,length(values)),false)
   end
-  allocations=map(values,touched) do values,touched
+  allocations = map(values,touched) do values,touched
     ArrayAllocationTrackTouchedAndValues(touched,values)
   end
-  PVectorAllocationTrackTouchedAndValues(allocations,values,dofs)
+  return PVectorAllocationTrackTouchedAndValues(allocations,values,dofs)
 end
 
 function local_views(a::PVectorAllocationTrackTouchedAndValues)
@@ -926,36 +766,385 @@ end
 
 function Algebra.create_from_nz(a::PVectorAllocationTrackTouchedAndValues)
   test_dofs_prange = a.test_dofs_gids_prange # dof ids of the test space
-  test_dofs_prange_partition = partition(test_dofs_prange)
   ngrdofs = length(test_dofs_prange)
-   
+  
   # Find the ghost rows
-  I_ghost_lids_to_dofs_ghost_lids=map(local_views(a.allocations),test_dofs_prange_partition) do allocation, indices
-    dofs_lids_touched=findall(allocation.touched)
+  allocations = local_views(a.allocations)
+  indices = partition(test_dofs_prange)
+  I_ghost_lids_to_dofs_ghost_lids = map(allocations, indices) do allocation, indices
+    dofs_lids_touched = findall(allocation.touched)
     loc_to_gho = local_to_ghost(indices)
     n_I_ghost_lids = count((x)->loc_to_gho[x]!=0,dofs_lids_touched)
     I_ghost_lids = Vector{Int32}(undef,n_I_ghost_lids)
-    cur=1
+    cur = 1
     for lid in dofs_lids_touched
-      dof_lid=loc_to_gho[lid]
-      if dof_lid!=0
-        I_ghost_lids[cur]=dof_lid
-        cur=cur+1
+      dof_lid = loc_to_gho[lid]
+      if dof_lid != 0
+        I_ghost_lids[cur] = dof_lid
+        cur = cur+1
       end
     end
     I_ghost_lids
   end
 
-  rows = _setup_rows_prange_impl_(ngrdofs,
-                             I_ghost_lids_to_dofs_ghost_lids,
-                             test_dofs_prange_partition)
+  gids_ghost_to_global, gids_ghost_to_owner = map(
+    find_gid_and_owner,I_ghost_lids_to_dofs_ghost_lids,indices) |> tuple_of_arrays
+
+  rows = _setup_prange_impl_(ngrdofs,indices,gids_ghost_to_global,gids_ghost_to_owner)
+  b    = _rhs_callback(a,rows)
+  t2   = assemble!(b)
+
+  # Wait the transfer to finish
+  if t2 !== nothing
+    wait(t2)
+  end
+  return b
+end
+
+
+# Common Assembly Utilities
+
+function first_gdof_from_ids(ids)
+  if own_length(ids) == 0
+    return 1
+  end
+  lid_to_gid   = local_to_global(ids) 
+  owned_to_lid = own_to_local(ids)
+  return Int(lid_to_gid[first(owned_to_lid)])
+end
+
+function find_gid_and_owner(ighost_to_jghost,jindices)
+  jghost_to_local  = ghost_to_local(jindices)
+  jlocal_to_global = local_to_global(jindices)
+  jlocal_to_owner  = local_to_owner(jindices)
+  ighost_to_jlocal = view(jghost_to_local,ighost_to_jghost)
+
+  ighost_to_global = jlocal_to_global[ighost_to_jlocal]
+  ighost_to_owner  = jlocal_to_owner[ighost_to_jlocal]
+  return ighost_to_global, ighost_to_owner
+end
+
+# The given ids are assumed to be a sub-set of the lids
+function ghost_lids_touched(a::AbstractLocalIndices,gids::AbstractVector{<:Integer})
+  glo_to_loc = global_to_local(a)
+  loc_to_gho = local_to_ghost(a)
+  
+  # First pass: Allocate
+  i = 0
+  ghost_lids_touched = fill(false,ghost_length(a))
+  for gid in gids
+    lid = glo_to_loc[gid]
+    ghost_lid = loc_to_gho[lid]
+    if ghost_lid > 0 && !ghost_lids_touched[ghost_lid]
+      ghost_lids_touched[ghost_lid] = true
+      i += 1
+    end
+  end
+  gids_ghost_lid_to_ghost_lid = Vector{Int32}(undef,i)
+
+  # Second pass: fill 
+  i = 1
+  fill!(ghost_lids_touched,false)
+  for gid in gids
+    lid = glo_to_loc[gid]
+    ghost_lid = loc_to_gho[lid]
+    if ghost_lid > 0 && !ghost_lids_touched[ghost_lid]
+      ghost_lids_touched[ghost_lid] = true
+      gids_ghost_lid_to_ghost_lid[i] = ghost_lid
+      i += 1
+    end
+  end
+
+  return gids_ghost_lid_to_ghost_lid
+end
+
+# Find the neighbours of partition1 trying 
+# to use those in partition2 as a hint 
+function _find_neighbours!(partition1, partition2)
+  partition2_snd, partition2_rcv = assembly_neighbors(partition2)
+  partition2_graph = ExchangeGraph(partition2_snd, partition2_rcv)
+  return assembly_neighbors(partition1; neighbors=partition2_graph)
+end
+
+# to_global! & to_local! analogs, needed for dispatching in block assembly
+
+function to_local_indices!(I,ids::PRange;kwargs...)
+  map(to_local!,I,partition(ids))
+end
+
+function to_global_indices!(I,ids::PRange;kwargs...)
+  map(to_global!,I,partition(ids))
+end
+
+function get_gid_owners(I,ids::PRange;kwargs...)
+  map(I,partition(ids)) do I, indices 
+    glo_to_loc = global_to_local(indices) 
+    loc_to_own = local_to_owner(indices)
+    map(x->loc_to_own[glo_to_loc[x]], I)
+  end 
+end
+
+for f in [:to_local_indices!, :to_global_indices!, :get_gid_owners]
+  @eval begin
+    function $f(I::Vector,ids::AbstractVector{<:PRange};kwargs...)
+      map($f,I,ids)
+    end
+
+    function $f(I::Matrix,ids::AbstractVector{<:PRange};ax=:rows)
+      @check ax ∈ [:rows,:cols]
+      block_ids = CartesianIndices(I)
+      map(block_ids) do id
+        i = id[1]; j = id[2];
+        if ax == :rows
+          $f(I[i,j],ids[i])
+        else
+          $f(I[i,j],ids[j])
+        end
+      end
+    end
+  end
+end
+
+# _setup_matrix : local matrices + global PRanges -> Global matrix
+
+function _setup_matrix(values,rows::PRange,cols::PRange)
+  return PSparseMatrix(values,partition(rows),partition(cols))
+end
+
+function _setup_matrix(values,rows::Vector{<:PRange},cols::Vector{<:PRange})
+  block_ids  = CartesianIndices((length(rows),length(cols)))
+  block_mats = map(block_ids) do I
+    block_values = map(v -> blocks(v)[I],values)
+    return _setup_matrix(block_values,rows[I[1]],cols[I[2]])
+  end
+  return mortar(block_mats)
+end
+
+# _assemble_coo! : local coo triplets + global PRange -> Global coo values
 
-  b = _rhs_callback(a,rows)
-  t2 = assemble!(b)
+function _assemble_coo!(I,J,V,rows::PRange;owners=nothing)
+  if isa(owners,Nothing)
+    PArrays.assemble_coo!(I,J,V,partition(rows))
+  else
+    assemble_coo_with_column_owner!(I,J,V,partition(rows),owners)
+  end
+end
 
-   # Wait the transfer to finish
-   if t2 !== nothing
-     wait(t2)
-   end
-   b
+function _assemble_coo!(I,J,V,rows::Vector{<:PRange};owners=nothing)
+  block_ids = CartesianIndices(I)
+  map(block_ids) do id
+    i = id[1]; j = id[2];
+    if isa(owners,Nothing)
+      _assemble_coo!(I[i,j],J[i,j],V[i,j],rows[i])
+    else
+      _assemble_coo!(I[i,j],J[i,j],V[i,j],rows[i],owners=owners[i,j])
+    end
+  end
+end
+
+function assemble_coo_with_column_owner!(I,J,V,row_partition,Jown)
+  """
+    Returns three JaggedArrays with the coo triplets
+    to be sent to the corresponding owner parts in parts_snd
+  """
+  function setup_snd(part,parts_snd,row_lids,coo_entries_with_column_owner)
+    global_to_local_row = global_to_local(row_lids)
+    local_row_to_owner = local_to_owner(row_lids)
+    owner_to_i = Dict(( owner=>i for (i,owner) in enumerate(parts_snd) ))
+    ptrs = zeros(Int32,length(parts_snd)+1)
+    k_gi, k_gj, k_jo, k_v = coo_entries_with_column_owner
+    for k in 1:length(k_gi)
+      gi = k_gi[k]
+      li = global_to_local_row[gi]
+      owner = local_row_to_owner[li]
+      if owner != part
+        ptrs[owner_to_i[owner]+1] +=1
+      end
+    end
+    PArrays.length_to_ptrs!(ptrs)
+    gi_snd_data = zeros(eltype(k_gi),ptrs[end]-1)
+    gj_snd_data = zeros(eltype(k_gj),ptrs[end]-1)
+    jo_snd_data = zeros(eltype(k_jo),ptrs[end]-1)
+    v_snd_data = zeros(eltype(k_v),ptrs[end]-1)
+    for k in 1:length(k_gi)
+      gi = k_gi[k]
+      li = global_to_local_row[gi]
+      owner = local_row_to_owner[li]
+      if owner != part
+        gj = k_gj[k]
+        v = k_v[k]
+        p = ptrs[owner_to_i[owner]]
+        gi_snd_data[p] = gi
+        gj_snd_data[p] = gj
+        jo_snd_data[p] = k_jo[k]
+        v_snd_data[p]  = v
+        k_v[k] = zero(v)
+        ptrs[owner_to_i[owner]] += 1
+      end
+    end
+    PArrays.rewind_ptrs!(ptrs)
+    gi_snd = JaggedArray(gi_snd_data,ptrs)
+    gj_snd = JaggedArray(gj_snd_data,ptrs)
+    jo_snd = JaggedArray(jo_snd_data,ptrs)
+    v_snd = JaggedArray(v_snd_data,ptrs)
+    gi_snd, gj_snd, jo_snd, v_snd
+  end
+  """
+    Pushes to coo_entries_with_column_owner the tuples 
+    gi_rcv,gj_rcv,jo_rcv,v_rcv received from remote processes
+  """
+  function setup_rcv!(coo_entries_with_column_owner,gi_rcv,gj_rcv,jo_rcv,v_rcv)
+    k_gi, k_gj, k_jo, k_v = coo_entries_with_column_owner
+    current_n = length(k_gi)
+    new_n = current_n + length(gi_rcv.data)
+    resize!(k_gi,new_n)
+    resize!(k_gj,new_n)
+    resize!(k_jo,new_n)
+    resize!(k_v,new_n)
+    for p in 1:length(gi_rcv.data)
+        k_gi[current_n+p] = gi_rcv.data[p]
+        k_gj[current_n+p] = gj_rcv.data[p]
+        k_jo[current_n+p] = jo_rcv.data[p]
+        k_v[current_n+p] = v_rcv.data[p]
+    end
+  end
+  part = linear_indices(row_partition)
+  parts_snd, parts_rcv = assembly_neighbors(row_partition)
+  coo_entries_with_column_owner = map(tuple,I,J,Jown,V)
+  gi_snd, gj_snd, jo_snd, v_snd = map(setup_snd,part,parts_snd,row_partition,coo_entries_with_column_owner) |> tuple_of_arrays
+  graph = ExchangeGraph(parts_snd,parts_rcv)
+  t1 = exchange(gi_snd,graph)
+  t2 = exchange(gj_snd,graph)
+  t3 = exchange(jo_snd,graph)
+  t4 = exchange(v_snd,graph)
+  @async begin
+      gi_rcv = fetch(t1)
+      gj_rcv = fetch(t2)
+      jo_rcv = fetch(t3)
+      v_rcv = fetch(t4)
+      map(setup_rcv!,coo_entries_with_column_owner,gi_rcv,gj_rcv,jo_rcv,v_rcv)
+      I,J,Jown,V
+  end
+end
+
+# dofs_gids_prange can be either test_dofs_gids_prange or trial_dofs_gids_prange
+# In the former case, gids is a vector of global test dof identifiers, while in the 
+# latter, a vector of global trial dof identifiers
+function _setup_prange(dofs_gids_prange::PRange,gids;ghost=true,owners=nothing,kwargs...)
+  if !ghost
+    _setup_prange_without_ghosts(dofs_gids_prange)
+  elseif isa(owners,Nothing)
+    _setup_prange_with_ghosts(dofs_gids_prange,gids)
+  else
+    _setup_prange_with_ghosts(dofs_gids_prange,gids,owners)
+  end
+end
+
+function _setup_prange(dofs_gids_prange::AbstractVector{<:PRange},
+                       gids::AbstractMatrix;
+                       ax=:rows,ghost=true,owners=nothing)
+  @check ax ∈ (:rows,:cols)
+  block_ids = LinearIndices(dofs_gids_prange)
+
+  gids_ax_slice, _owners = map(block_ids,dofs_gids_prange) do id,prange
+    gids_ax_slice = (ax == :rows) ? gids[id,:] : gids[:,id]
+    _owners = nothing
+    if ghost
+      gids_ax_slice = map(x -> union(x...), to_parray_of_arrays(gids_ax_slice))
+      if !isa(owners,Nothing) # Recompute owners for the union
+        _owners = get_gid_owners(gids_ax_slice,prange)
+      end
+    end
+    return gids_ax_slice, _owners
+  end |> tuple_of_arrays
+  
+  return map((p,g,o) -> _setup_prange(p,g;ghost=ghost,owners=o),dofs_gids_prange,gids_ax_slice,_owners)
+end
+
+# Create PRange for the rows of the linear system
+# without local ghost dofs as per required in the 
+# FullyAssembledRows() parallel assembly strategy 
+function _setup_prange_without_ghosts(dofs_gids_prange::PRange)
+  ngdofs = length(dofs_gids_prange)
+  indices = map(partition(dofs_gids_prange)) do dofs_indices 
+    owner = part_id(dofs_indices)
+    own_indices = OwnIndices(ngdofs,owner,own_to_global(dofs_indices))
+    ghost_indices = GhostIndices(ngdofs,Int64[],Int32[])
+    OwnAndGhostIndices(own_indices,ghost_indices)
+  end
+  return PRange(indices)
+end
+
+# Here we are assuming that the sparse communication graph underlying test_dofs_gids_partition
+# is a superset of the one underlying indices. This is (has to be) true for the rows of the linear system.
+# The precondition required for the consistency of any parallel assembly process in GridapDistributed 
+# is that each processor can determine locally with a single layer of ghost cells the global indices and associated 
+# processor owners of the rows that it touches after assembly of integration terms posed on locally-owned entities 
+# (i.e., either cells or faces). 
+function _setup_prange_with_ghosts(dofs_gids_prange::PRange,gids)
+  ngdofs = length(dofs_gids_prange)
+  dofs_gids_partition = partition(dofs_gids_prange)
+
+  # Selected ghost ids -> dof PRange ghost ids
+  gids_ghost_lids_to_dofs_ghost_lids = map(ghost_lids_touched,dofs_gids_partition,gids)
+
+  # Selected ghost ids -> [global dof ids, owner processor ids]
+  gids_ghost_to_global, gids_ghost_to_owner = map(
+    find_gid_and_owner,gids_ghost_lids_to_dofs_ghost_lids,dofs_gids_partition) |> tuple_of_arrays
+
+  return _setup_prange_impl_(ngdofs,dofs_gids_partition,gids_ghost_to_global,gids_ghost_to_owner)
+end
+
+# Here we cannot assume that the sparse communication graph underlying 
+# trial_dofs_gids_partition is a superset of the one underlying indices.
+# Here we chould check whether it is included and call _find_neighbours!()
+# if this is the case. At present, we are not taking advantage of this, 
+# but let the parallel scalable algorithm to compute the reciprocal to do the job. 
+function _setup_prange_with_ghosts(dofs_gids_prange::PRange,gids,owners)
+  ngdofs = length(dofs_gids_prange)
+  dofs_gids_partition = partition(dofs_gids_prange)
+
+  # Selected ghost ids -> [global dof ids, owner processor ids]
+  gids_ghost_to_global, gids_ghost_to_owner = map(
+    gids,owners,dofs_gids_partition) do gids, owners, indices
+    ghost_touched   = Dict{Int,Bool}()
+    ghost_to_global = Int64[] 
+    ghost_to_owner  = Int64[]
+    me = part_id(indices)
+    for (j,jo) in zip(gids,owners)
+      if jo != me
+        if !haskey(ghost_touched,j)
+          push!(ghost_to_global,j)
+          push!(ghost_to_owner,jo)
+          ghost_touched[j] = true
+        end
+      end
+    end
+    ghost_to_global, ghost_to_owner
+  end |> tuple_of_arrays
+
+  return _setup_prange_impl_(ngdofs,
+                             dofs_gids_partition,
+                             gids_ghost_to_global,
+                             gids_ghost_to_owner;
+                             discover_neighbours=false)
+end 
+
+function _setup_prange_impl_(ngdofs,
+                             dofs_gids_partition,
+                             gids_ghost_to_global,
+                             gids_ghost_to_owner;
+                             discover_neighbours=true)
+  indices = map(dofs_gids_partition, 
+                gids_ghost_to_global, 
+                gids_ghost_to_owner) do dofs_indices, ghost_to_global, ghost_to_owner 
+    owner = part_id(dofs_indices)
+    own_indices   = OwnIndices(ngdofs,owner,own_to_global(dofs_indices))
+    ghost_indices = GhostIndices(ngdofs,ghost_to_global,ghost_to_owner)
+    OwnAndGhostIndices(own_indices,ghost_indices)
+  end
+  if discover_neighbours
+    _find_neighbours!(indices,dofs_gids_partition)
+  end
+  return PRange(indices)
 end
diff --git a/src/BlockPartitionedArrays.jl b/src/BlockPartitionedArrays.jl
new file mode 100644
index 00000000..5661432f
--- /dev/null
+++ b/src/BlockPartitionedArrays.jl
@@ -0,0 +1,365 @@
+
+"""
+  struct BlockPRange{A} <: AbstractUnitRange{Int}
+"""
+struct BlockPRange{A} <: AbstractUnitRange{Int}
+  ranges::Vector{PRange{A}}
+  function BlockPRange(ranges::Vector{<:PRange{A}}) where A
+    new{A}(ranges)
+  end
+end
+
+Base.first(a::BlockPRange) = 1
+Base.last(a::BlockPRange) = sum(map(last,a.ranges))
+
+BlockArrays.blocklength(a::BlockPRange) = length(a.ranges)
+BlockArrays.blocksize(a::BlockPRange) = (blocklength(a),)
+BlockArrays.blockaxes(a::BlockPRange) = (Block.(Base.OneTo(blocklength(a))),)
+BlockArrays.blocks(a::BlockPRange) = a.ranges
+
+function PartitionedArrays.partition(a::BlockPRange)
+  return map(partition,blocks(a)) |> to_parray_of_arrays
+end
+
+function Base.getindex(a::BlockPRange,inds::Block{1})
+  a.ranges[inds.n...]
+end
+
+"""
+  struct BlockPArray{V,T,N,A,B} <: BlockArrays.AbstractBlockArray{T,N}
+"""
+struct BlockPArray{V,T,N,A,B} <: BlockArrays.AbstractBlockArray{T,N}
+  blocks::Array{A,N}
+  axes::NTuple{N,B}
+
+  function BlockPArray(blocks::Array{<:AbstractArray{T,N},N},
+                       axes  ::NTuple{N,<:BlockPRange}) where {T,N}
+    @check all(map(d->size(blocks,d)==blocklength(axes[d]),1:N))
+    local_type(::Type{<:PVector{V}}) where V = V
+    local_type(::Type{<:PSparseMatrix{V}}) where V = V
+    A = eltype(blocks)
+    B = typeof(first(axes))
+    V = local_type(A)
+    new{V,T,N,A,B}(blocks,axes)
+  end
+end
+
+const BlockPVector{V,T,A,B} = BlockPArray{V,T,1,A,B}
+const BlockPMatrix{V,T,A,B} = BlockPArray{V,T,2,A,B}
+
+@inline function BlockPVector(blocks::Vector{<:PVector},rows::BlockPRange)
+  BlockPArray(blocks,(rows,))
+end
+
+@inline function BlockPVector(blocks::Vector{<:PVector},rows::Vector{<:PRange})
+  BlockPVector(blocks,BlockPRange(rows))
+end
+
+@inline function BlockPMatrix(blocks::Matrix{<:PSparseMatrix},rows::BlockPRange,cols::BlockPRange)
+  BlockPArray(blocks,(rows,cols))
+end
+
+@inline function BlockPMatrix(blocks::Matrix{<:PSparseMatrix},rows::Vector{<:PRange},cols::Vector{<:PRange})
+  BlockPMatrix(blocks,BlockPRange(rows),BlockPRange(cols))
+end
+
+function BlockPVector{V}(::UndefInitializer,rows::BlockPRange) where {V}
+  vals = map(blocks(rows)) do r
+    PVector{V}(undef,partition(r))
+  end
+  return BlockPVector(vals,rows)
+end
+
+function BlockPMatrix{V}(::UndefInitializer,rows::BlockPRange,cols::BlockPRange) where {V}
+  block_ids  = CartesianIndices((blocklength(rows),blocklength(cols)))
+  block_rows = blocks(rows)
+  block_cols = blocks(cols)
+  vals = map(block_ids) do I
+    r = block_rows[I[1]]
+    c = block_cols[I[2]]
+    PSparseMatrix{V}(undef,partition(r),partition(c))
+  end
+  return BlockPMatrix(vals,rows)
+end
+
+# AbstractArray API
+
+Base.axes(a::BlockPArray) = a.axes
+Base.size(a::BlockPArray) = Tuple(map(length,a.axes))
+
+Base.IndexStyle(::Type{<:BlockPVector}) = IndexLinear()
+Base.IndexStyle(::Type{<:BlockPMatrix}) = IndexCartesian()
+
+function Base.similar(a::BlockPVector,::Type{T},inds::Tuple{<:BlockPRange}) where T
+  vals = map(blocks(a),blocks(inds[1])) do ai,i
+    similar(ai,T,i)
+  end
+  return BlockPArray(vals,inds)
+end
+
+function Base.similar(::Type{<:BlockPVector{V,T,A}},inds::Tuple{<:BlockPRange}) where {V,T,A}
+  rows   = blocks(inds[1])
+  values = map(rows) do r
+    return similar(A,(r,))
+  end
+  return BlockPArray(values,inds)
+end
+
+function Base.similar(a::BlockPMatrix,::Type{T},inds::Tuple{<:BlockPRange,<:BlockPRange}) where T
+  vals = map(CartesianIndices(blocksize(a))) do I
+    rows = inds[1].ranges[I[1]]
+    cols = inds[2].ranges[I[2]]
+    similar(a.blocks[I],T,(rows,cols))
+  end
+  return BlockPArray(vals,inds)
+end
+
+function Base.similar(::Type{<:BlockPMatrix{V,T,A}},inds::Tuple{<:BlockPRange,<:BlockPRange}) where {V,T,A}
+  rows = blocks(inds[1])
+  cols = blocks(inds[2])
+  values = map(CartesianIndices((length(rows),length(cols)))) do I
+    i,j = I[1],I[2]
+    return similar(A,(rows[i],cols[j]))
+  end
+  return BlockPArray(values,inds)
+end
+
+function Base.getindex(a::BlockPArray{T,N},inds::Vararg{Int,N}) where {T,N}
+  @error "Scalar indexing not supported"
+end
+function Base.setindex(a::BlockPArray{T,N},v,inds::Vararg{Int,N}) where {T,N}
+  @error "Scalar indexing not supported"
+end
+
+function Base.show(io::IO,k::MIME"text/plain",data::BlockPArray{T,N}) where {T,N}
+  v = first(blocks(data))
+  s = prod(map(si->"$(si)x",blocksize(data)))[1:end-1]
+  map_main(partition(v)) do values
+      println(io,"$s-block BlockPArray{$T,$N}")
+  end
+end
+
+function Base.zero(v::BlockPArray)
+  return mortar(map(zero,blocks(v)))
+end
+
+function Base.copyto!(y::BlockPVector,x::BlockPVector)
+  @check blocklength(x) == blocklength(y)
+  for i in blockaxes(x,1)
+    copyto!(y[i],x[i])
+  end
+  return y
+end
+
+function Base.copyto!(y::BlockPMatrix,x::BlockPMatrix)
+  @check blocksize(x) == blocksize(y)
+  for i in blockaxes(x,1)
+    for j in blockaxes(x,2)
+      copyto!(y[i,j],x[i,j])
+    end
+  end
+  return y
+end
+
+function Base.fill!(a::BlockPVector,v)
+  map(blocks(a)) do a
+    fill!(a,v)
+  end
+  return a
+end
+
+function Base.sum(a::BlockPArray)
+  return sum(map(sum,blocks(a)))
+end
+
+Base.maximum(x::BlockPArray) = maximum(identity,x)
+function Base.maximum(f::Function,x::BlockPArray)
+  maximum(map(xi->maximum(f,xi),blocks(x)))
+end
+
+Base.minimum(x::BlockPArray) = minimum(identity,x)
+function Base.minimum(f::Function,x::BlockPArray)
+  minimum(map(xi->minimum(f,xi),blocks(x)))
+end
+
+function Base.:(==)(a::BlockPVector,b::BlockPVector)
+  A = length(a) == length(b)
+  B = all(map((ai,bi)->ai==bi,blocks(a),blocks(b)))
+  return A && B
+end
+
+function Base.any(f::Function,x::BlockPVector)
+  any(map(xi->any(f,xi),blocks(x)))
+end
+
+function Base.all(f::Function,x::BlockPVector)
+  all(map(xi->all(f,xi),blocks(x)))
+end
+
+function LinearAlgebra.rmul!(a::BlockPVector,v::Number)
+  map(ai->rmul!(ai,v),blocks(a))
+  return a
+end
+
+# AbstractBlockArray API
+
+BlockArrays.blocks(a::BlockPArray) = a.blocks
+
+function Base.getindex(a::BlockPArray,inds::Block{1})
+  a.blocks[inds.n...]
+end
+function Base.getindex(a::BlockPArray{V,T,N},inds::Block{N}) where {V,T,N}
+  a.blocks[inds.n...]
+end
+function Base.getindex(a::BlockPArray{V,T,N},inds::Vararg{Block{1},N}) where {V,T,N}
+  a.blocks[map(i->i.n[1],inds)...]
+end
+
+function BlockArrays.mortar(blocks::Vector{<:PVector})
+  rows = map(b->axes(b,1),blocks)
+  BlockPVector(blocks,rows)
+end
+
+function BlockArrays.mortar(blocks::Matrix{<:PSparseMatrix})
+  rows = map(b->axes(b,1),blocks[:,1])
+  cols = map(b->axes(b,2),blocks[1,:])
+
+  function check_axes(a,r,c)
+    A = PartitionedArrays.matching_local_indices(axes(a,1),r)
+    B = PartitionedArrays.matching_local_indices(axes(a,2),c)
+    return A & B
+  end
+  @check all(map(I -> check_axes(blocks[I],rows[I[1]],cols[I[2]]),CartesianIndices(size(blocks))))
+
+  return BlockPMatrix(blocks,rows,cols)
+end
+
+# PartitionedArrays API
+
+Base.wait(t::Array)  = map(wait,t)
+Base.fetch(t::Array) = map(fetch,t)
+
+function PartitionedArrays.assemble!(a::BlockPArray)
+  map(assemble!,blocks(a))
+end
+
+function PartitionedArrays.consistent!(a::BlockPArray)
+  map(consistent!,blocks(a))
+end
+
+function PartitionedArrays.partition(a::BlockPArray)
+  vals = map(partition,blocks(a)) |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
+function PartitionedArrays.to_trivial_partition(a::BlockPArray)
+  vals = map(PartitionedArrays.to_trivial_partition,blocks(a))
+  return mortar(vals)
+end
+
+function PartitionedArrays.local_values(a::BlockPArray)
+  vals = map(local_values,blocks(a)) |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
+function PartitionedArrays.own_values(a::BlockPArray)
+  vals = map(own_values,blocks(a)) |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
+function PartitionedArrays.ghost_values(a::BlockPArray)
+  vals = map(ghost_values,blocks(a)) |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
+function PartitionedArrays.own_ghost_values(a::BlockPMatrix)
+  vals = map(own_ghost_values,blocks(a)) |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
+function PartitionedArrays.ghost_own_values(a::BlockPMatrix)
+  vals = map(ghost_own_values,blocks(a)) |> to_parray_of_arrays
+  return map(mortar,vals)
+end
+
+# LinearAlgebra API
+
+function LinearAlgebra.mul!(y::BlockPVector,A::BlockPMatrix,x::BlockPVector)
+  z = zero(eltype(y))
+  o = one(eltype(A))
+  for i in blockaxes(A,2)
+    fill!(y[i],z)
+    for j in blockaxes(A,2)
+      mul!(y[i],A[i,j],x[j],o,o)
+    end
+  end
+end
+
+function LinearAlgebra.dot(x::BlockPVector,y::BlockPVector)
+  return sum(map(dot,blocks(x),blocks(y)))
+end
+
+function LinearAlgebra.norm(v::BlockPVector,p::Real=2)
+  block_norms = map(vi->norm(vi,p),blocks(v))
+  return sum(block_norms.^p)^(1/p)
+end
+
+function LinearAlgebra.fillstored!(a::BlockPMatrix,v)
+  map(blocks(a)) do a
+    LinearAlgebra.fillstored!(a,v)
+  end
+  return a
+end
+
+# Broadcasting
+
+struct BlockPBroadcasted{A,B}
+  blocks :: A
+  axes   :: B
+end
+
+BlockArrays.blocks(b::BlockPBroadcasted) = b.blocks
+BlockArrays.blockaxes(b::BlockPBroadcasted) = b.axes
+
+function Base.broadcasted(f, args::Union{BlockPArray,BlockPBroadcasted}...)
+  a1 = first(args)
+  @boundscheck @assert all(ai -> blockaxes(ai) == blockaxes(a1),args)
+  
+  blocks_in = map(blocks,args)
+  blocks_out = map((largs...)->Base.broadcasted(f,largs...),blocks_in...)
+  
+  return BlockPBroadcasted(blocks_out,blockaxes(a1))
+end
+
+function Base.broadcasted(f, a::Number, b::Union{BlockPArray,BlockPBroadcasted})
+  blocks_out = map(b->Base.broadcasted(f,a,b),blocks(b))
+  return BlockPBroadcasted(blocks_out,blockaxes(b))
+end
+
+function Base.broadcasted(f, a::Union{BlockPArray,BlockPBroadcasted}, b::Number)
+  blocks_out = map(a->Base.broadcasted(f,a,b),blocks(a))
+  return BlockPBroadcasted(blocks_out,blockaxes(a))
+end
+
+function Base.broadcasted(f,
+                        a::Union{BlockPArray,BlockPBroadcasted},
+                        b::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}})
+  Base.broadcasted(f,a,Base.materialize(b))
+end
+
+function Base.broadcasted(
+  f,
+  a::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}},
+  b::Union{BlockPArray,BlockPBroadcasted})
+  Base.broadcasted(f,Base.materialize(a),b)
+end
+
+function Base.materialize(b::BlockPBroadcasted)
+  blocks_out = map(Base.materialize,blocks(b))
+  return mortar(blocks_out)
+end
+
+function Base.materialize!(a::BlockPArray,b::BlockPBroadcasted)
+  map(Base.materialize!,blocks(a),blocks(b))
+  return a
+end
diff --git a/src/DivConformingFESpaces.jl b/src/DivConformingFESpaces.jl
index bfd5f3dd..ac31e505 100644
--- a/src/DivConformingFESpaces.jl
+++ b/src/DivConformingFESpaces.jl
@@ -1,5 +1,4 @@
-"""
-"""
+
 function FESpaces.FESpace(model::DistributedDiscreteModel,
                           reffe::Tuple{RaviartThomas,Any,Any};
                           conformity=nothing,kwargs...)
diff --git a/src/FESpaces.jl b/src/FESpaces.jl
index 3953bd90..21aedf42 100644
--- a/src/FESpaces.jl
+++ b/src/FESpaces.jl
@@ -119,11 +119,11 @@ end
 function fetch_vector_ghost_values_cache(vector_partition,partition)
   cache = PArrays.p_vector_cache(vector_partition,partition)
   map(reverse,cache)
-end 
+end
 
 function fetch_vector_ghost_values!(vector_partition,cache)
   assemble!((a,b)->b, vector_partition, cache) 
-end 
+end
 
 function generate_gids(
   cell_range::PRange,
@@ -173,7 +173,6 @@ function generate_gids(
                          cell_to_ldofs,
                          cell_range)
 
-
   # Find the global range of owned dofs
   first_gdof = scan(+,nodofs,type=:exclusive,init=one(eltype(nodofs)))
   
@@ -325,18 +324,18 @@ function FESpaces.EvaluationFunction(
 end
 
 function _EvaluationFunction(func,
-  f::DistributedSingleFieldFESpace,free_values::AbstractVector,isconsistent=false)
-  local_vals = consistent_local_views(free_values,f.gids,isconsistent)
-  fields = map(func,f.spaces,local_vals)
+  f::DistributedSingleFieldFESpace,x::AbstractVector,isconsistent=false)
+  free_values = change_ghost(x,f.gids,is_consistent=isconsistent,make_consistent=true)
+  fields   = map(func,f.spaces,partition(free_values))
   metadata = DistributedFEFunctionData(free_values)
   DistributedCellField(fields,metadata)
 end
 
 function _EvaluationFunction(func,
-  f::DistributedSingleFieldFESpace,free_values::AbstractVector,
+  f::DistributedSingleFieldFESpace,x::AbstractVector,
   dirichlet_values::AbstractArray{<:AbstractVector},isconsistent=false)
-  local_vals = consistent_local_views(free_values,f.gids,isconsistent)
-  fields = map(func,f.spaces,local_vals,dirichlet_values)
+  free_values = change_ghost(x,f.gids,is_consistent=isconsistent,make_consistent=true)
+  fields   = map(func,f.spaces,partition(free_values),dirichlet_values)
   metadata = DistributedFEFunctionData(free_values)
   DistributedCellField(fields,metadata)
 end
@@ -463,16 +462,16 @@ end
 
 # Factories
 
-function FESpaces.FESpace(model::DistributedDiscreteModel,reffe;kwargs...)
+function FESpaces.FESpace(model::DistributedDiscreteModel,reffe;split_own_and_ghost=false,kwargs...)
   spaces = map(local_views(model)) do m
     FESpace(m,reffe;kwargs...)
   end
   gids =  generate_gids(model,spaces)
-  vector_type = _find_vector_type(spaces,gids)
+  vector_type = _find_vector_type(spaces,gids;split_own_and_ghost=split_own_and_ghost)
   DistributedSingleFieldFESpace(spaces,gids,vector_type)
 end
 
-function FESpaces.FESpace(_trian::DistributedTriangulation,reffe;kwargs...)
+function FESpaces.FESpace(_trian::DistributedTriangulation,reffe;split_own_and_ghost=false,kwargs...)
   trian = add_ghost_cells(_trian)
   trian_gids = generate_cell_gids(trian)
   spaces = map(trian.trians) do t
@@ -481,23 +480,23 @@ function FESpaces.FESpace(_trian::DistributedTriangulation,reffe;kwargs...)
   cell_to_ldofs = map(get_cell_dof_ids,spaces)
   nldofs = map(num_free_dofs,spaces)
   gids = generate_gids(trian_gids,cell_to_ldofs,nldofs)
-  vector_type = _find_vector_type(spaces,gids)
+  vector_type = _find_vector_type(spaces,gids;split_own_and_ghost=split_own_and_ghost)
   DistributedSingleFieldFESpace(spaces,gids,vector_type)
 end
 
-function _find_vector_type(spaces,gids)
-  # TODO Now the user can select the local vector type but not the global one
-  # new kw-arg global_vector_type ?
-  # we use PVector for the moment
-  local_vector_type=typeof(Int)
-  map(spaces) do space
-    local_vector_type=get_vector_type(space)
+function _find_vector_type(spaces,gids;split_own_and_ghost=false)
+  local_vector_type = get_vector_type(PartitionedArrays.getany(spaces))
+  Tv = eltype(local_vector_type)
+  T  = Vector{Tv}
+  if split_own_and_ghost
+    T = OwnAndGhostVectors{T}
   end
-  vector_entries=map(i->local_vector_type(undef,length(local_to_owner(i))),partition(gids))
-  
-  # Here we are determining the full type of a PVector by creating one auxiliary vector
-  # Can this be done more efficiently?
-  vector_type = typeof(PVector(vector_entries,partition(gids)))
+  if isa(gids,PRange)
+    vector_type = typeof(PVector{T}(undef,partition(gids)))
+  else # isa(gids,BlockPRange)
+    vector_type = typeof(BlockPVector{T}(undef,gids))
+  end
+  return vector_type
 end
 
 # Assembly
@@ -506,17 +505,12 @@ function FESpaces.collect_cell_matrix(
   trial::DistributedFESpace,
   test::DistributedFESpace,
   a::DistributedDomainContribution)
-  map(
-    collect_cell_matrix,
-    local_views(trial),
-    local_views(test),
-    local_views(a))
+  map(collect_cell_matrix,local_views(trial),local_views(test),local_views(a))
 end
 
 function FESpaces.collect_cell_vector(
   test::DistributedFESpace, a::DistributedDomainContribution)
-  map(
-    collect_cell_vector,local_views(test),local_views(a))
+  map(collect_cell_vector,local_views(test),local_views(a))
 end
 
 function FESpaces.collect_cell_matrix_and_vector(
@@ -557,8 +551,7 @@ function FESpaces.collect_cell_matrix_and_vector(
   test::DistributedFESpace,
   mat::DistributedDomainContribution,
   l::Number)
-  map(
-    local_views(trial),local_views(test),local_views(mat)) do u,v,m
+  map(local_views(trial),local_views(test),local_views(mat)) do u,v,m
     collect_cell_matrix_and_vector(u,v,m,l)
   end
 end
@@ -569,8 +562,7 @@ function FESpaces.collect_cell_matrix_and_vector(
   mat::DistributedDomainContribution,
   l::Number,
   uhd)
-  map(
-    local_views(trial),local_views(test),local_views(mat),local_views(uhd)) do u,v,m,f
+  map(local_views(trial),local_views(test),local_views(mat),local_views(uhd)) do u,v,m,f
     collect_cell_matrix_and_vector(u,v,m,l,f)
   end
 end
@@ -594,27 +586,37 @@ FESpaces.get_vector_builder(a::DistributedSparseMatrixAssembler) = a.vector_buil
 FESpaces.get_assembly_strategy(a::DistributedSparseMatrixAssembler) = a.strategy
 
 function FESpaces.symbolic_loop_matrix!(A,a::DistributedSparseMatrixAssembler,matdata)
-  map(symbolic_loop_matrix!,local_views(A,a.test_dofs_gids_prange,a.trial_dofs_gids_prange),a.assems,matdata)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map(symbolic_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata)
 end
 
 function FESpaces.numeric_loop_matrix!(A,a::DistributedSparseMatrixAssembler,matdata)
-  map(numeric_loop_matrix!,local_views(A,a.test_dofs_gids_prange,a.trial_dofs_gids_prange),a.assems,matdata)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map(numeric_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata)
 end
 
 function FESpaces.symbolic_loop_vector!(b,a::DistributedSparseMatrixAssembler,vecdata)
-  map(symbolic_loop_vector!,local_views(b,a.test_dofs_gids_prange),a.assems,vecdata)
+  rows = get_rows(a)
+  map(symbolic_loop_vector!,local_views(b,rows),local_views(a),vecdata)
 end
 
 function FESpaces.numeric_loop_vector!(b,a::DistributedSparseMatrixAssembler,vecdata)
-  map(numeric_loop_vector!,local_views(b,a.test_dofs_gids_prange),a.assems,vecdata)
+  rows = get_rows(a)
+  map(numeric_loop_vector!,local_views(b,rows),local_views(a),vecdata)
 end
 
 function FESpaces.symbolic_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data)
-  map(symbolic_loop_matrix_and_vector!,local_views(A,a.test_dofs_gids_prange,a.trial_dofs_gids_prange),local_views(b,a.test_dofs_gids_prange),a.assems,data)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
 end
 
 function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::DistributedSparseMatrixAssembler,data)
-  map(numeric_loop_matrix_and_vector!,local_views(A,a.test_dofs_gids_prange,a.trial_dofs_gids_prange),local_views(b,a.test_dofs_gids_prange),a.assems,data)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map(numeric_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
 end
 
 # Parallel Assembly strategies
@@ -625,16 +627,37 @@ end
 
 # When using this one, make sure that you also loop over ghost cells.
 # This is at your own risk.
-function local_assembly_strategy(::FullyAssembledRows,test_space_indices,trial_space_indices)
-  test_space_local_to_ghost = local_to_ghost(test_space_indices)
+function local_assembly_strategy(::FullyAssembledRows,rows,cols)
+  rows_local_to_ghost = local_to_ghost(rows)
   GenericAssemblyStrategy(
     identity,
     identity,
-    row->test_space_local_to_ghost[row]==0,
+    row->rows_local_to_ghost[row]==0,
     col->true)
 end
 
 # Assembler high level constructors
+function FESpaces.SparseMatrixAssembler(
+  local_mat_type,
+  local_vec_type,
+  rows::PRange,
+  cols::PRange,
+  par_strategy=SubAssembledRows())
+
+  assems = map(partition(rows),partition(cols)) do rows,cols
+    local_strategy = local_assembly_strategy(par_strategy,rows,cols)
+    FESpaces.GenericSparseMatrixAssembler(SparseMatrixBuilder(local_mat_type),
+                                          ArrayBuilder(local_vec_type),
+                                          Base.OneTo(length(rows)),
+                                          Base.OneTo(length(cols)),
+                                          local_strategy)
+  end
+
+  mat_builder = PSparseMatrixBuilderCOO(local_mat_type,par_strategy)
+  vec_builder = PVectorBuilder(local_vec_type,par_strategy)
+  return DistributedSparseMatrixAssembler(par_strategy,assems,mat_builder,vec_builder,rows,cols)
+end
+
 function FESpaces.SparseMatrixAssembler(
   local_mat_type,
   local_vec_type,
@@ -642,25 +665,9 @@ function FESpaces.SparseMatrixAssembler(
   test::DistributedFESpace,
   par_strategy=SubAssembledRows())
 
-  Tv = local_vec_type
-  T = eltype(Tv)
-  Tm = local_mat_type
-  trial_dofs_gids_partition = partition(trial.gids)
-  test_dofs_gids_partition = partition(test.gids)
-  assems = map(local_views(test),local_views(trial),test_dofs_gids_partition,trial_dofs_gids_partition) do v,u,trial_gids_partition,test_gids_partition
-    local_strategy = local_assembly_strategy(par_strategy,trial_gids_partition,test_gids_partition)
-    SparseMatrixAssembler(Tm,Tv,u,v,local_strategy)
-  end
-  matrix_builder = PSparseMatrixBuilderCOO(Tm,par_strategy)
-  vector_builder = PVectorBuilder(Tv,par_strategy)
-  test_dofs_gids_prange = get_free_dof_ids(test)
-  trial_dofs_gids_prange = get_free_dof_ids(trial)
-  DistributedSparseMatrixAssembler(par_strategy,
-                                   assems,
-                                   matrix_builder,
-                                   vector_builder,
-                                   test_dofs_gids_prange,
-                                   trial_dofs_gids_prange)
+  rows = get_free_dof_ids(test)
+  cols = get_free_dof_ids(trial)
+  SparseMatrixAssembler(local_mat_type,local_vec_type,rows,cols,par_strategy)
 end
 
 function FESpaces.SparseMatrixAssembler(
@@ -668,11 +675,8 @@ function FESpaces.SparseMatrixAssembler(
   test::DistributedFESpace,
   par_strategy=SubAssembledRows())
 
-  Tv = typeof(Int)
-  map(local_views(trial)) do trial
-    Tv = get_vector_type(trial)
-  end
-  T = eltype(Tv)
+  Tv = PartitionedArrays.getany(map(get_vector_type,local_views(trial)))
+  T  = eltype(Tv)
   Tm = SparseMatrixCSC{T,Int}
   SparseMatrixAssembler(Tm,Tv,trial,test,par_strategy)
 end
diff --git a/src/GridapDistributed.jl b/src/GridapDistributed.jl
index eca02ab6..9e985bc8 100644
--- a/src/GridapDistributed.jl
+++ b/src/GridapDistributed.jl
@@ -22,9 +22,11 @@ const PArrays = PartitionedArrays
 using SparseArrays
 using WriteVTK
 using FillArrays
+using BlockArrays
+using LinearAlgebra
 
 import Gridap.TensorValues: inner, outer, double_contraction, symmetric_part
-import LinearAlgebra: det, tr, cross, dot, ⋅
+import LinearAlgebra: det, tr, cross, dot, ⋅, diag
 import Base: inv, abs, abs2, *, +, -, /, adjoint, transpose, real, imag, conj, getproperty, propertynames
 import Gridap.Fields: grad2curl
 import Gridap.ODEs.ODETools: ∂t, ∂tt
@@ -38,6 +40,8 @@ export get_face_gids
 export local_views, get_parts
 export with_ghost, no_ghost
 
+include("BlockPartitionedArrays.jl")
+
 include("Algebra.jl")
 
 include("Geometry.jl")
diff --git a/src/MultiField.jl b/src/MultiField.jl
index 68ccb499..fa1f16e9 100644
--- a/src/MultiField.jl
+++ b/src/MultiField.jl
@@ -29,20 +29,21 @@ local_views(a::Vector{<:DistributedCellField}) = [ai.fields for ai in a]
 
 """
 """
-struct DistributedMultiFieldFESpace{A,B,C,D} <: DistributedFESpace
+struct DistributedMultiFieldFESpace{MS,A,B,C,D} <: DistributedFESpace
+  multi_field_style::MS
   field_fe_space::A
   part_fe_space::B
   gids::C
   vector_type::Type{D}
   function DistributedMultiFieldFESpace(
     field_fe_space::AbstractVector{<:DistributedSingleFieldFESpace},
-    part_fe_space::AbstractArray{<:MultiFieldFESpace},
-    gids::PRange,
-    vector_type::Type{D}) where D
+    part_fe_space::AbstractArray{<:MultiFieldFESpace{MS}},
+    gids::Union{<:PRange,<:BlockPRange},
+    vector_type::Type{D}) where {D,MS}
     A = typeof(field_fe_space)
     B = typeof(part_fe_space)
     C = typeof(gids)
-    new{A,B,C,D}(field_fe_space,part_fe_space,gids,vector_type)
+    new{MS,A,B,C,D}(MS(),field_fe_space,part_fe_space,gids,vector_type)
   end
 end
 
@@ -62,7 +63,7 @@ function FESpaces.get_free_dof_ids(fs::DistributedMultiFieldFESpace)
 end
 
 function MultiField.restrict_to_field(
-  f::DistributedMultiFieldFESpace,free_values::PVector,field::Integer)
+  f::DistributedMultiFieldFESpace,free_values::AbstractVector,field::Integer)
   values = map(f.part_fe_space,partition(free_values)) do u,x
     restrict_to_field(u,x,field)
   end
@@ -72,10 +73,8 @@ end
 
 function FESpaces.FEFunction(
   f::DistributedMultiFieldFESpace,x::AbstractVector,isconsistent=false)
-  free_values = change_ghost(x,f.gids)
-  # This will cause also the single-field components to be consistent
-  local_vals = consistent_local_views(free_values,f.gids,isconsistent)
-  part_fe_fun = map(FEFunction,f.part_fe_space,local_vals)
+  free_values  = change_ghost(x,f.gids;is_consistent=isconsistent,make_consistent=true)
+  part_fe_fun  = map(FEFunction,f.part_fe_space,partition(free_values))
   field_fe_fun = DistributedSingleFieldFEFunction[]
   for i in 1:num_fields(f)
     free_values_i = restrict_to_field(f,free_values,i)
@@ -88,10 +87,8 @@ end
 
 function FESpaces.EvaluationFunction(
   f::DistributedMultiFieldFESpace,x::AbstractVector,isconsistent=false)
-  free_values = change_ghost(x,f.gids)
-  # This will cause also the single-field components to be consistent
-  local_vals = consistent_local_views(free_values,f.gids,false)
-  part_fe_fun = map(EvaluationFunction,f.part_fe_space,local_vals)
+  free_values  = change_ghost(x,f.gids;is_consistent=isconsistent,make_consistent=true)
+  part_fe_fun  = map(EvaluationFunction,f.part_fe_space,partition(free_values))
   field_fe_fun = DistributedSingleFieldFEFunction[]
   for i in 1:num_fields(f)
     free_values_i = restrict_to_field(f,free_values,i)
@@ -108,8 +105,7 @@ function FESpaces.interpolate(objects,fe::DistributedMultiFieldFESpace)
 end
 
 function FESpaces.interpolate!(objects,free_values::AbstractVector,fe::DistributedMultiFieldFESpace)
-  local_vals = consistent_local_views(free_values,fe.gids,true)
-  part_fe_fun = map(local_vals,local_views(fe)) do x,f
+  part_fe_fun = map(partition(free_values),local_views(fe)) do x,f
     interpolate!(objects,x,f)
   end
   field_fe_fun = DistributedSingleFieldFEFunction[]
@@ -124,8 +120,7 @@ end
 
 function FESpaces.interpolate_everywhere(objects,fe::DistributedMultiFieldFESpace)
   free_values = zero_free_values(fe)
-  local_vals = consistent_local_views(free_values,fe.gids,true)
-  part_fe_fun = map(local_vals,local_views(fe)) do x,f
+  part_fe_fun = map(partition(free_values),local_views(fe)) do x,f
     interpolate!(objects,x,f)
   end
   field_fe_fun = DistributedSingleFieldFEFunction[]
@@ -143,8 +138,10 @@ function FESpaces.interpolate_everywhere!(
   objects,free_values::AbstractVector,
   dirichlet_values::Vector{AbstractArray{<:AbstractVector}},
   fe::DistributedMultiFieldFESpace)
-  local_vals = consistent_local_views(free_values,fe.gids,true)
-  part_fe_fun = map(local_vals,local_views(fe)) do x,f
+  msg = "free_values and fe have incompatible index partitions."
+  @check partition(axes(free_values,1)) === partition(fe.gids) msg
+  
+  part_fe_fun = map(partition(free_values),local_views(fe)) do x,f
     interpolate!(objects,x,f)
   end
   field_fe_fun = DistributedSingleFieldFEFunction[]
@@ -160,7 +157,7 @@ end
 
 function FESpaces.interpolate_everywhere(
   objects::Vector{<:DistributedCellField},fe::DistributedMultiFieldFESpace)
-  local_objects = local_views(objects)
+  local_objects = map(local_views,objects)
   local_spaces = local_views(fe)
   part_fe_fun = map(local_spaces,local_objects...) do f,o...
     interpolate_everywhere(o,f)
@@ -224,13 +221,13 @@ function FESpaces.TrialFESpace(objects,a::DistributedMultiFieldFESpace)
   TrialFESpace(a,objects)
 end
 
-function FESpaces.TrialFESpace(a::DistributedMultiFieldFESpace,objects)
+function FESpaces.TrialFESpace(a::DistributedMultiFieldFESpace{MS},objects) where MS
   f_dspace_test = a.field_fe_space
   f_dspace = map( arg -> TrialFESpace(arg[1],arg[2]), zip(f_dspace_test,objects) )
   f_p_space = map(local_views,f_dspace)
   v(x...) = collect(x)
   p_f_space = map(v,f_p_space...)
-  p_mspace = map(MultiFieldFESpace,p_f_space)
+  p_mspace  = map(s->MultiFieldFESpace(s;style=MS()),p_f_space)
   gids = a.gids
   vector_type = a.vector_type
   DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,vector_type)
@@ -239,17 +236,20 @@ end
 # Factory
 
 function MultiField.MultiFieldFESpace(
-  f_dspace::Vector{<:DistributedSingleFieldFESpace})
+  f_dspace::Vector{<:DistributedSingleFieldFESpace};split_own_and_ghost=false, kwargs...)
   f_p_space = map(local_views,f_dspace)
   v(x...) = collect(x)
-  p_f_space = map(v,f_p_space...)
-  p_mspace = map(MultiFieldFESpace,p_f_space)
-  gids = generate_multi_field_gids(f_dspace,p_mspace)
-  vector_type = _find_vector_type(p_mspace,gids)
+
+  p_f_space   = map(v,f_p_space...)
+  p_mspace    = map(f->MultiFieldFESpace(f;kwargs...),p_f_space)
+  style       = PartitionedArrays.getany(map(MultiFieldStyle,p_mspace))
+  gids        = generate_multi_field_gids(style,f_dspace,p_mspace)
+  vector_type = _find_vector_type(p_mspace,gids;split_own_and_ghost=split_own_and_ghost)
   DistributedMultiFieldFESpace(f_dspace,p_mspace,gids,vector_type)
 end
 
 function generate_multi_field_gids(
+  ::MultiFieldStyle,
   f_dspace::Vector{<:DistributedSingleFieldFESpace},
   p_mspace::AbstractArray{<:MultiFieldFESpace})
 
@@ -268,6 +268,19 @@ function generate_multi_field_gids(
   gids = generate_multi_field_gids(f_p_flid_lid,f_frange)
 end
 
+function generate_multi_field_gids(
+  ::BlockMultiFieldStyle{NB,SB,P},
+  f_dspace::Vector{<:DistributedSingleFieldFESpace},
+  p_mspace::AbstractArray{<:MultiFieldFESpace}) where {NB,SB,P}
+
+  block_ranges = MultiField.get_block_ranges(NB,SB,P)
+  block_gids = map(block_ranges) do range
+    space = (length(range) == 1) ? f_dspace[range[1]] : MultiFieldFESpace(f_dspace[range])
+    get_free_dof_ids(space)
+  end
+  return BlockPRange(block_gids)
+end
+
 function generate_multi_field_gids(
   f_p_flid_lid::AbstractVector{<:AbstractArray{<:AbstractVector}},
   f_frange::AbstractVector{<:PRange})
@@ -381,3 +394,92 @@ function propagate_to_ghost_multifield!(
     end
   end
 end
+
+# BlockSparseMatrixAssemblers
+
+const DistributedBlockSparseMatrixAssembler{NB,NV,SB,P} = 
+  MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P,<:DistributedSparseMatrixAssembler}
+
+function FESpaces.SparseMatrixAssembler(
+  local_mat_type,
+  local_vec_type,
+  trial::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}},
+  test::DistributedMultiFieldFESpace{<:BlockMultiFieldStyle{NB,SB,P}},
+  par_strategy=SubAssembledRows()) where {NB,SB,P}
+
+  block_idx  = CartesianIndices((NB,NB))
+  block_rows = blocks(test.gids)
+  block_cols = blocks(trial.gids)
+  block_assemblers = map(block_idx) do idx
+    rows = block_rows[idx[1]]; cols = block_cols[idx[2]]
+    return SparseMatrixAssembler(local_mat_type,local_vec_type,rows,cols,par_strategy)
+  end
+
+  NV = length(P)
+  return MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}(block_assemblers)
+end
+
+function local_views(a::MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P}) where {NB,NV,SB,P}
+  assems = a.block_assemblers
+  array = to_parray_of_arrays(map(local_views,assems))
+  return map(MultiField.BlockSparseMatrixAssembler{NB,NV,SB,P},array)
+end
+
+function local_views(a::MatrixBlock,rows,cols)
+  idx = CartesianIndices(axes(a))
+  array = map(idx) do I
+    local_views(a[I],rows[I[1]],cols[I[2]])
+  end
+  return map(b -> ArrayBlock(b,a.touched), to_parray_of_arrays(array))
+end
+
+function local_views(a::VectorBlock,rows)
+  idx = CartesianIndices(axes(a))
+  array = map(idx) do I
+    local_views(a[I],rows[I])
+  end
+  return map(b -> ArrayBlock(b,a.touched), to_parray_of_arrays(array))
+end
+
+function local_views(a::ArrayBlockView,axes...)
+  array = local_views(a.array,axes...)
+  map(array) do array
+    ArrayBlockView(array,a.block_map)
+  end
+end
+
+# SparseMatrixAssembler API
+
+function FESpaces.symbolic_loop_matrix!(A,a::DistributedBlockSparseMatrixAssembler,matdata)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map(symbolic_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata)
+end
+
+function FESpaces.numeric_loop_matrix!(A,a::DistributedBlockSparseMatrixAssembler,matdata)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map(numeric_loop_matrix!,local_views(A,rows,cols),local_views(a),matdata)
+end
+
+function FESpaces.symbolic_loop_vector!(b,a::DistributedBlockSparseMatrixAssembler,vecdata)
+  rows = get_rows(a)
+  map(symbolic_loop_vector!,local_views(b,rows),local_views(a),vecdata)
+end
+
+function FESpaces.numeric_loop_vector!(b,a::DistributedBlockSparseMatrixAssembler,vecdata)
+  rows = get_rows(a)
+  map(numeric_loop_vector!,local_views(b,rows),local_views(a),vecdata)
+end
+
+function FESpaces.symbolic_loop_matrix_and_vector!(A,b,a::DistributedBlockSparseMatrixAssembler,data)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map(symbolic_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
+end
+
+function FESpaces.numeric_loop_matrix_and_vector!(A,b,a::DistributedBlockSparseMatrixAssembler,data)
+  rows = get_rows(a)
+  cols = get_cols(a)
+  map(numeric_loop_matrix_and_vector!,local_views(A,rows,cols),local_views(b,rows),local_views(a),data)
+end
diff --git a/test/BlockPartitionedArraysTests.jl b/test/BlockPartitionedArraysTests.jl
new file mode 100644
index 00000000..3e810085
--- /dev/null
+++ b/test/BlockPartitionedArraysTests.jl
@@ -0,0 +1,112 @@
+
+using Test
+using Gridap
+using PartitionedArrays
+using GridapDistributed
+using BlockArrays
+using SparseArrays
+using LinearAlgebra
+
+using GridapDistributed: BlockPArray, BlockPVector, BlockPMatrix, BlockPRange
+
+ranks = with_debug() do distribute
+  distribute(LinearIndices((2,)))
+end
+
+indices = map(ranks) do r
+  if r == 1
+    own_gids = [1,2,3,4,5]
+    ghost_gids   = [6,7]
+    ghost_owners = [2,2]
+  else
+    own_gids = [6,7,8,9,10]
+    ghost_gids   = [5]
+    ghost_owners = [1]
+  end
+  own_idx   = OwnIndices(10,r,own_gids)
+  ghost_idx = GhostIndices(10,ghost_gids,ghost_owners)
+  OwnAndGhostIndices(own_idx,ghost_idx)
+end
+
+block_range = BlockPRange([PRange(indices),PRange(indices)])
+
+_v = PVector{OwnAndGhostVectors{Vector{Float64}}}(undef,indices)
+v = BlockPArray([_v,_v],(block_range,))
+fill!(v,1.0)
+
+_m = map(CartesianIndices((2,2))) do I
+  i,j = I[1],I[2]
+  local_mats = map(ranks,indices) do r, ind
+    n = local_length(ind)
+    if i==j && r == 1
+      SparseMatrixCSC(n,n,Int[1,3,5,7,9,10,11,13],Int[1,2,2,3,3,4,4,5,5,6,6,7],fill(1.0,12))
+    elseif i==j && r == 2
+      SparseMatrixCSC(n,n,Int[1,2,4,6,8,10,11],Int[1,1,2,2,3,3,4,4,5,6],fill(1.0,10))
+    else
+      SparseMatrixCSC(n,n,fill(Int(1),n+1),Int[],Float64.([]))
+    end
+  end
+  PSparseMatrix(local_mats,indices,indices)
+end;
+m = BlockPArray(_m,(block_range,block_range))
+
+x = similar(_v)
+mul!(x,_m[1,1],_v)
+
+# BlockPRange
+
+@test blocklength(block_range) == 2
+@test blocksize(block_range) == (2,)
+
+# AbstractArray API
+
+__v = similar(v,block_range)
+__m = similar(m,(block_range,block_range))
+fill!(__v,1.0)
+
+__v = __v .+ 1.0
+__v = __v .- 1.0
+__v = __v .* 1.0
+__v = __v ./ 1.0
+
+# PartitionedArrays API
+
+consistent!(__v) |> wait
+t = assemble!(__v)
+assemble!(__m) |> wait
+fetch(t);
+
+PartitionedArrays.to_trivial_partition(m)
+
+partition(v)
+partition(m)
+local_values(v)
+own_values(v)
+ghost_values(v)
+own_ghost_values(m)
+ghost_own_values(m)
+
+# LinearAlgebra API
+fill!(v,1.0)
+x = similar(v)
+mul!(x,m,v)
+consistent!(x) |> wait
+
+@test dot(v,x) ≈ 36
+norm(v)
+copy!(x,v)
+
+LinearAlgebra.fillstored!(__m,1.0)
+
+__v = BlockPVector{Vector{Float64}}(undef,block_range)
+#__m = BlockPMatrix{SparseMatrixCSC{Float64,Int64}}(undef,block_range,block_range)
+
+maximum(abs,v)
+minimum(abs,v)
+
+# GridapDistributed API
+v_parts = local_views(v)
+m_parts = local_views(m)
+
+v_parts = local_views(v,block_range)
+m_parts = local_views(m,block_range,block_range)
diff --git a/test/BlockSparseMatrixAssemblersTests.jl b/test/BlockSparseMatrixAssemblersTests.jl
new file mode 100644
index 00000000..9b92f185
--- /dev/null
+++ b/test/BlockSparseMatrixAssemblersTests.jl
@@ -0,0 +1,117 @@
+module BlockSparseMatrixAssemblersTests
+
+using Test, LinearAlgebra, BlockArrays, SparseArrays
+
+using Gridap
+using Gridap.FESpaces, Gridap.ReferenceFEs, Gridap.MultiField
+
+using GridapDistributed
+using PartitionedArrays
+using GridapDistributed: BlockPVector, BlockPMatrix
+
+function is_same_vector(x::BlockPVector,y::PVector,Ub,U)
+  y_fespace = GridapDistributed.change_ghost(y,U.gids)
+  x_fespace = GridapDistributed.change_ghost(x,Ub.gids)
+
+  res = map(1:num_fields(Ub)) do i
+    xi = restrict_to_field(Ub,x_fespace,i)
+    yi = restrict_to_field(U,y_fespace,i)
+    xi ≈ yi
+  end
+  return all(res)
+end
+
+function is_same_matrix(Ab::BlockPMatrix,A::PSparseMatrix,Xb,X)
+  yb = mortar(map(Aii->pfill(0.0,partition(axes(Aii,1))),diag(blocks(Ab))));
+  xb = mortar(map(Aii->pfill(1.0,partition(axes(Aii,2))),diag(blocks(Ab))));
+  mul!(yb,Ab,xb)
+
+  y = pfill(0.0,partition(axes(A,1)))
+  x = pfill(1.0,partition(axes(A,2)))
+  mul!(y,A,x)
+
+  return is_same_vector(yb,y,Xb,X)
+end
+
+function _main(n_spaces,mfs,weakform,Ω,dΩ,U,V)
+  biform, liform = weakform
+
+  # Normal assembly 
+  Y = MultiFieldFESpace(fill(V,n_spaces))
+  X = MultiFieldFESpace(fill(U,n_spaces))
+
+  u = get_trial_fe_basis(X)
+  v = get_fe_basis(Y)
+
+  data = collect_cell_matrix_and_vector(X,Y,biform(u,v),liform(v))
+  matdata = collect_cell_matrix(X,Y,biform(u,v))
+  vecdata = collect_cell_vector(Y,liform(v))  
+
+  assem = SparseMatrixAssembler(X,Y,FullyAssembledRows())
+  A1 = assemble_matrix(assem,matdata)
+  b1 = assemble_vector(assem,vecdata)
+  A2,b2 = assemble_matrix_and_vector(assem,data);
+
+  # Block Assembly
+  Yb  = MultiFieldFESpace(fill(V,n_spaces);style=mfs)
+  Xb  = MultiFieldFESpace(fill(U,n_spaces);style=mfs)
+
+  ub = get_trial_fe_basis(Xb)
+  vb = get_fe_basis(Yb)
+
+  bdata = collect_cell_matrix_and_vector(Xb,Yb,biform(ub,vb),liform(vb))
+  bmatdata = collect_cell_matrix(Xb,Yb,biform(ub,vb))
+  bvecdata = collect_cell_vector(Yb,liform(vb))
+
+  assem_blocks = SparseMatrixAssembler(Xb,Yb,FullyAssembledRows())
+  A1_blocks = assemble_matrix(assem_blocks,bmatdata);
+  b1_blocks = assemble_vector(assem_blocks,bvecdata);
+  @test is_same_vector(b1_blocks,b1,Yb,Y)
+  @test is_same_matrix(A1_blocks,A1,Xb,X)
+
+  assemble_matrix!(A1_blocks,assem_blocks,bmatdata);
+  assemble_vector!(b1_blocks,assem_blocks,bvecdata);
+  @test is_same_vector(b1_blocks,b1,Yb,Y)
+  @test is_same_matrix(A1_blocks,A1,Xb,X)
+
+  A2_blocks, b2_blocks = assemble_matrix_and_vector(assem_blocks,bdata)
+  @test is_same_vector(b2_blocks,b2,Yb,Y)
+  @test is_same_matrix(A2_blocks,A2,Xb,X)
+  
+  assemble_matrix_and_vector!(A2_blocks,b2_blocks,assem_blocks,bdata)
+  @test is_same_vector(b2_blocks,b2,Yb,Y)
+  @test is_same_matrix(A2_blocks,A2,Xb,X)
+
+  op = AffineFEOperator(biform,liform,X,Y)
+  block_op = AffineFEOperator(biform,liform,Xb,Yb)
+  @test is_same_vector(get_vector(block_op),get_vector(op),Yb,Y)
+  @test is_same_matrix(get_matrix(block_op),get_matrix(op),Xb,X)
+end
+
+############################################################################################
+
+function main(distribute,parts)
+  ranks = distribute(LinearIndices((prod(parts),)))
+
+  model = CartesianDiscreteModel(ranks,parts,(0,1,0,1),(8,8))
+  Ω = Triangulation(model)
+
+  sol(x) = sum(x)
+  reffe  = LagrangianRefFE(Float64,QUAD,1)
+  V = FESpace(Ω, reffe; dirichlet_tags="boundary")
+  U = TrialFESpace(sol,V)
+
+  dΩ = Measure(Ω, 2)
+  biform2((u1,u2),(v1,v2)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2)*dΩ
+  liform2((v1,v2)) = ∫(v1 + v2)*dΩ
+  biform3((u1,u2,u3),(v1,v2,v3)) = ∫(∇(u1)⋅∇(v1) + u2⋅v2 + u1⋅v2 + u3⋅v2 + u2⋅v3)*dΩ
+  liform3((v1,v2,v3)) = ∫(v1 + v2 + v3)*dΩ
+
+  for (n_spaces,weakform) in zip([2,3],[(biform2,liform2),(biform3,liform3)])
+    for mfs in [BlockMultiFieldStyle(),BlockMultiFieldStyle(2,(1,n_spaces-1))]
+      _main(n_spaces,mfs,weakform,Ω,dΩ,U,V)
+    end
+  end
+end
+
+end
\ No newline at end of file
diff --git a/test/FESpacesTests.jl b/test/FESpacesTests.jl
index 7b0860ab..bf1e78e6 100644
--- a/test/FESpacesTests.jl
+++ b/test/FESpacesTests.jl
@@ -91,11 +91,11 @@ function main(distribute,parts,das)
   V = TestFESpace(model,reffe,dirichlet_tags="boundary")
   U = TrialFESpace(u,V)
   V2 = FESpace(Ω,reffe)
-  @test get_vector_type(V) <: PVector
-  @test get_vector_type(U) <: PVector
-  @test get_vector_type(V2) <: PVector
+  @test get_vector_type(V) <: PVector{<:Vector}
+  @test get_vector_type(U) <: PVector{<:Vector}
+  @test get_vector_type(V2) <: PVector{<:Vector}
 
-  free_values_partition=map(partition(V.gids)) do indices 
+  free_values_partition = map(partition(V.gids)) do indices 
     ones(Float64,local_length(indices))
   end 
 
@@ -163,6 +163,23 @@ function main(distribute,parts,das)
   cont  = ∫( abs2(u0h) )dΩ
   @test sqrt(sum(cont)) < 1.0e-14
 
+  # OwnAndGhostVector partitions
+  V3 = FESpace(model,reffe,dirichlet_tags="boundary",split_own_and_ghost=true)
+  U3 = TrialFESpace(u,V3)
+  @test get_vector_type(V3) <: PVector{<:OwnAndGhostVectors}
+
+  free_values = zero_free_values(U3)
+  dirichlet_values = get_dirichlet_dof_values(U3)
+  uh = interpolate_everywhere(u,U3)
+  _uh = interpolate_everywhere(uh,U3)
+  __uh = interpolate_everywhere!(_uh,free_values,dirichlet_values,U3)
+
+  uh = interpolate(u,U3)
+  dofs      = get_fe_dof_basis(U3)
+  cell_vals = dofs(uh)
+  gather_free_values!(free_values,U3,cell_vals)
+  gather_free_and_dirichlet_values!(free_values,dirichlet_values,U3,cell_vals)
+  uh = FEFunction(U3,free_values,dirichlet_values)
 
   # I need to use the square [0,2]² in the sequel so that
   # when integrating over the interior facets, the entries
diff --git a/test/MultiFieldTests.jl b/test/MultiFieldTests.jl
index b5a4f534..cb53e322 100644
--- a/test/MultiFieldTests.jl
+++ b/test/MultiFieldTests.jl
@@ -2,11 +2,17 @@ module MultiFieldTests
 
 using Gridap
 using Gridap.FESpaces
+using Gridap.MultiField
 using GridapDistributed
 using PartitionedArrays
 using Test
 
-function main(distribute, parts)
+function l2_error(u1,u2,dΩ)
+  eu = u1 - u2
+  sqrt(sum(∫( eu⋅eu )dΩ))
+end
+
+function main(distribute, parts, mfs)
   ranks  = distribute(LinearIndices((prod(parts),)))
   output = mkpath(joinpath(@__DIR__,"output"))
 
@@ -16,6 +22,7 @@ function main(distribute, parts)
   Ω = Triangulation(model)
 
   k = 2
+  dΩ = Measure(Ω,2*k)
   reffe_u = ReferenceFE(lagrangian,VectorValue{2,Float64},k)
   reffe_p = ReferenceFE(lagrangian,Float64,k-1,space=:P)
 
@@ -29,34 +36,43 @@ function main(distribute, parts)
   U = TrialFESpace(V,u)
   P = TrialFESpace(Q,p)
 
-  VxQ = MultiFieldFESpace([V,Q])
-  UxP = MultiFieldFESpace([U,P]) # This generates again the global numbering
+  VxQ = MultiFieldFESpace([V,Q];style=mfs)
+  UxP = MultiFieldFESpace([U,P];style=mfs) # This generates again the global numbering
   UxP = TrialFESpace([u,p],VxQ) # This reuses the one computed
   @test length(UxP) == 2
 
   uh, ph = interpolate([u,p],UxP)
-  eu = u - uh
-  ep = p - ph
-
-  dΩ = Measure(Ω,2*k)
-  @test sqrt(sum(∫( eu⋅eu )dΩ)) < 1.0e-9
-  @test sqrt(sum(∫( eu⋅eu )dΩ)) < 1.0e-9
+  @test l2_error(u,uh,dΩ) < 1.0e-9
+  @test l2_error(p,ph,dΩ) < 1.0e-9
 
   a((u,p),(v,q)) = ∫( ∇(v)⊙∇(u) - q*(∇⋅u) - (∇⋅v)*p )*dΩ
   l((v,q)) = ∫( v⋅f - q*g )*dΩ
 
   op = AffineFEOperator(a,l,UxP,VxQ)
-  solver = LinearFESolver(BackslashSolver())
-  uh, ph = solve(solver,op)
-
-  eu = u - uh
-  ep = p - ph
-
-  writevtk(Ω,"Ω",nsubcells=10,cellfields=["uh"=>uh,"ph"=>ph])
-
-  @test sqrt(sum(∫( eu⋅eu )dΩ)) < 1.0e-9
-  @test sqrt(sum(∫( eu⋅eu )dΩ)) < 1.0e-9
+  if !isa(mfs,BlockMultiFieldStyle) # BlockMultiFieldStyle does not support BackslashSolver
+    solver = LinearFESolver(BackslashSolver())
+    uh, ph = solve(solver,op)
+    @test l2_error(u,uh,dΩ) < 1.0e-9
+    @test l2_error(p,ph,dΩ) < 1.0e-9
+
+    writevtk(Ω,"Ω",nsubcells=10,cellfields=["uh"=>uh,"ph"=>ph])
+  end
+
+  A  = get_matrix(op)
+  xh = interpolate([u,p],UxP)
+  x  = GridapDistributed.change_ghost(get_free_dof_values(xh),axes(A,2))
+  uh1, ph1 = FESpaces.EvaluationFunction(UxP,x)
+  uh2, ph2 = FEFunction(UxP,x)
+
+  @test l2_error(u,uh1,dΩ) < 1.0e-9
+  @test l2_error(p,ph1,dΩ) < 1.0e-9
+  @test l2_error(u,uh2,dΩ) < 1.0e-9
+  @test l2_error(p,ph2,dΩ) < 1.0e-9
+end
 
+function main(distribute, parts)
+  main(distribute, parts, ConsecutiveMultiFieldStyle())
+  main(distribute, parts, BlockMultiFieldStyle())
 end
 
 end # module
diff --git a/test/StokesHdivDGTests.jl b/test/StokesHdivDGTests.jl
index f3f19c78..f9d77494 100644
--- a/test/StokesHdivDGTests.jl
+++ b/test/StokesHdivDGTests.jl
@@ -34,84 +34,84 @@ function stokes_solution_2D(μ::Real)
 end
 
 function main(distribute,parts)
-    ranks = distribute(LinearIndices((prod(parts),)))
-    
-    μ = 1.0
-    sol = stokes_solution_2D(μ)
-    u_ref = sol.u
-    f_ref = sol.f
-    σ_ref = sol.σ
-
-    D = 2
-    n = 4
-    domain    = Tuple(repeat([0,1],D))
-    partition = (n,n)
-    model     = CartesianDiscreteModel(ranks,parts,domain,partition)
-
-    labels = get_face_labeling(model)
-    add_tag_from_tags!(labels,"dirichlet",[5,6,7])
-    add_tag_from_tags!(labels,"neumann",[8,])
-
-    ############################################################################################
-    order = 1
-    reffeᵤ = ReferenceFE(raviart_thomas,Float64,order)
-    V = TestFESpace(model,reffeᵤ,conformity=:HDiv,dirichlet_tags="dirichlet")
-    U = TrialFESpace(V,u_ref)
-
-    reffeₚ = ReferenceFE(lagrangian,Float64,order;space=:P)
-    Q = TestFESpace(model,reffeₚ,conformity=:L2)
-    P = TrialFESpace(Q)
-
-    Y = MultiFieldFESpace([V, Q])
-    X = MultiFieldFESpace([U, P])
-
-    qdegree = 2*order+1
-    Ω   = Triangulation(model)
-    dΩ  = Measure(Ω,qdegree)
-
-    Γ   = BoundaryTriangulation(model)
-    dΓ  = Measure(Γ,qdegree)
-    n_Γ = get_normal_vector(Γ)
-
-    Γ_D  = BoundaryTriangulation(model;tags=["dirichlet"])
-    dΓ_D = Measure(Γ_D,qdegree)
-    n_Γ_D = get_normal_vector(Γ_D)
-
-    Γ_N  = BoundaryTriangulation(model;tags="neumann")
-    dΓ_N = Measure(Γ_N,qdegree)
-    n_Γ_N = get_normal_vector(Γ_N)
-
-    Λ   = SkeletonTriangulation(model)
-    dΛ  = Measure(Λ,qdegree)
-    n_Λ = get_normal_vector(Λ)
-
-    h_e    = CellField(map(get_array,local_views(∫(1)dΩ)),Ω)
-    h_e_Λ  = CellField(map(get_array,local_views(∫(1)dΛ)),Λ)
-    h_e_Γ_D = CellField(map(get_array,local_views(∫(1)dΓ_D)),Γ_D)
-
-    β_U = 50.0
-    Δ_dg(u,v) = ∫(∇(v)⊙∇(u))dΩ - 
-                ∫(jump(v⊗n_Λ)⊙(mean(∇(u))))dΛ -
-                ∫(mean(∇(v))⊙jump(u⊗n_Λ))dΛ - 
-                ∫(v⋅(∇(u)⋅n_Γ_D))dΓ_D - 
-                ∫((∇(v)⋅n_Γ_D)⋅u)dΓ_D
-    rhs((v,q)) = ∫((f_ref⋅v))*dΩ - ∫((∇(v)⋅n_Γ_D)⋅u_ref)dΓ_D + ∫((n_Γ_N⋅σ_ref)⋅v)*dΓ_N
-
-    penalty(u,v) = ∫(jump(v⊗n_Λ)⊙((β_U/h_e_Λ*jump(u⊗n_Λ))))dΛ + ∫(v⋅(β_U/h_e_Γ_D*u))dΓ_D
-    penalty_rhs((v,q)) = ∫(v⋅(β_U/h_e_Γ_D*u_ref))dΓ_D
-
-    a((u,p),(v,q)) = Δ_dg(u,v) + ∫(-(∇⋅v)*p - q*(∇⋅u))dΩ  + penalty(u,v)  
-    l((v,q)) = rhs((v,q)) - ∫(q*(∇⋅u_ref))dΩ + penalty_rhs((v,q)) 
-
-    op = AffineFEOperator(a,l,X,Y)
-    xh = solve(op)
-
-    uh, ph = xh
-    err_u = l2_error(Ω,uh,sol.u) 
-    err_p = l2_error(Ω,ph,sol.p)
-    tol = 1.0e-12
-    @test err_u < tol
-    @test err_p < tol
+  ranks = distribute(LinearIndices((prod(parts),)))
+  
+  μ = 1.0
+  sol = stokes_solution_2D(μ)
+  u_ref = sol.u
+  f_ref = sol.f
+  σ_ref = sol.σ
+
+  D = 2
+  n = 4
+  domain    = Tuple(repeat([0,1],D))
+  partition = (n,n)
+  model     = CartesianDiscreteModel(ranks,parts,domain,partition)
+
+  labels = get_face_labeling(model)
+  add_tag_from_tags!(labels,"dirichlet",[5,6,7])
+  add_tag_from_tags!(labels,"neumann",[8,])
+
+  ############################################################################################
+  order = 1
+  reffeᵤ = ReferenceFE(raviart_thomas,Float64,order)
+  V = TestFESpace(model,reffeᵤ,conformity=:HDiv,dirichlet_tags="dirichlet")
+  U = TrialFESpace(V,u_ref)
+
+  reffeₚ = ReferenceFE(lagrangian,Float64,order;space=:P)
+  Q = TestFESpace(model,reffeₚ,conformity=:L2)
+  P = TrialFESpace(Q)
+
+  Y = MultiFieldFESpace([V, Q])
+  X = MultiFieldFESpace([U, P])
+
+  qdegree = 2*order+1
+  Ω   = Triangulation(model)
+  dΩ  = Measure(Ω,qdegree)
+
+  Γ   = BoundaryTriangulation(model)
+  dΓ  = Measure(Γ,qdegree)
+  n_Γ = get_normal_vector(Γ)
+
+  Γ_D  = BoundaryTriangulation(model;tags=["dirichlet"])
+  dΓ_D = Measure(Γ_D,qdegree)
+  n_Γ_D = get_normal_vector(Γ_D)
+
+  Γ_N  = BoundaryTriangulation(model;tags="neumann")
+  dΓ_N = Measure(Γ_N,qdegree)
+  n_Γ_N = get_normal_vector(Γ_N)
+
+  Λ   = SkeletonTriangulation(model)
+  dΛ  = Measure(Λ,qdegree)
+  n_Λ = get_normal_vector(Λ)
+
+  h_e    = CellField(map(get_array,local_views(∫(1)dΩ)),Ω)
+  h_e_Λ  = CellField(map(get_array,local_views(∫(1)dΛ)),Λ)
+  h_e_Γ_D = CellField(map(get_array,local_views(∫(1)dΓ_D)),Γ_D)
+
+  β_U = 50.0
+  Δ_dg(u,v) = ∫(∇(v)⊙∇(u))dΩ - 
+              ∫(jump(v⊗n_Λ)⊙(mean(∇(u))))dΛ -
+              ∫(mean(∇(v))⊙jump(u⊗n_Λ))dΛ - 
+              ∫(v⋅(∇(u)⋅n_Γ_D))dΓ_D - 
+              ∫((∇(v)⋅n_Γ_D)⋅u)dΓ_D
+  rhs((v,q)) = ∫((f_ref⋅v))*dΩ - ∫((∇(v)⋅n_Γ_D)⋅u_ref)dΓ_D + ∫((n_Γ_N⋅σ_ref)⋅v)*dΓ_N
+
+  penalty(u,v) = ∫(jump(v⊗n_Λ)⊙((β_U/h_e_Λ*jump(u⊗n_Λ))))dΛ + ∫(v⋅(β_U/h_e_Γ_D*u))dΓ_D
+  penalty_rhs((v,q)) = ∫(v⋅(β_U/h_e_Γ_D*u_ref))dΓ_D
+
+  a((u,p),(v,q)) = Δ_dg(u,v) + ∫(-(∇⋅v)*p - q*(∇⋅u))dΩ  + penalty(u,v)  
+  l((v,q)) = rhs((v,q)) - ∫(q*(∇⋅u_ref))dΩ + penalty_rhs((v,q)) 
+
+  op = AffineFEOperator(a,l,X,Y)
+  xh = solve(op)
+
+  uh, ph = xh
+  err_u = l2_error(Ω,uh,sol.u) 
+  err_p = l2_error(Ω,ph,sol.p)
+  tol = 1.0e-12
+  @test err_u < tol
+  @test err_p < tol
 end
 
 end # module
diff --git a/test/TestApp/.gitignore b/test/TestApp/.gitignore
new file mode 100644
index 00000000..ba39cc53
--- /dev/null
+++ b/test/TestApp/.gitignore
@@ -0,0 +1 @@
+Manifest.toml
diff --git a/test/TestApp/Project.toml b/test/TestApp/Project.toml
index 9754ac89..31c314de 100644
--- a/test/TestApp/Project.toml
+++ b/test/TestApp/Project.toml
@@ -3,6 +3,7 @@ uuid = "3ba29202-0f57-4e69-8cbd-5c57d4c4860a"
 version = "0.1.0"
 
 [deps]
+BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
 Gridap = "56d4f2e9-7ea1-5844-9cf6-b9c51ca7ce8e"
 GridapDistributed = "f9701e48-63b3-45aa-9a63-9bc6c271f355"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/test/TestApp/src/TestApp.jl b/test/TestApp/src/TestApp.jl
index f6c40936..2e5aed87 100644
--- a/test/TestApp/src/TestApp.jl
+++ b/test/TestApp/src/TestApp.jl
@@ -12,4 +12,5 @@ module TestApp
   include("../../HeatEquationTests.jl")
   include("../../StokesOpenBoundaryTests.jl")
   include("../../AdaptivityTests.jl")
+  include("../../BlockSparseMatrixAssemblersTests.jl")
 end
\ No newline at end of file
diff --git a/test/mpi/runtests_np4_body.jl b/test/mpi/runtests_np4_body.jl
index 0e8fbb6a..70f9c01c 100644
--- a/test/mpi/runtests_np4_body.jl
+++ b/test/mpi/runtests_np4_body.jl
@@ -45,5 +45,8 @@ function all_tests(distribute,parts)
     PArrays.toc!(t,"Adaptivity")
   end
 
+  TestApp.BlockSparseMatrixAssemblersTests.main(distribute,parts)
+  PArrays.toc!(t,"BlockSparseMatrixAssemblers")
+
   display(t)
 end
diff --git a/test/sequential/BlockSparseMatrixAssemblersTests.jl b/test/sequential/BlockSparseMatrixAssemblersTests.jl
new file mode 100644
index 00000000..9c60126d
--- /dev/null
+++ b/test/sequential/BlockSparseMatrixAssemblersTests.jl
@@ -0,0 +1,17 @@
+module BlockSparseMatrixAssemblersTestsSeq
+using PartitionedArrays
+include("../BlockSparseMatrixAssemblersTests.jl")
+
+with_debug() do distribute
+  BlockSparseMatrixAssemblersTests.main(distribute,(2,2))
+end
+
+with_debug() do distribute
+  BlockSparseMatrixAssemblersTests.main(distribute,(2,1))
+end
+
+with_debug() do distribute
+  BlockSparseMatrixAssemblersTests.main(distribute,(1,2))
+end
+
+end # module
\ No newline at end of file
diff --git a/test/sequential/runtests.jl b/test/sequential/runtests.jl
index 12f82145..57da04ef 100644
--- a/test/sequential/runtests.jl
+++ b/test/sequential/runtests.jl
@@ -34,5 +34,8 @@ end
 
 @time @testset "StokesHdivDGTests.jl" begin include("StokesHdivDGTests.jl") end
 
+@time @testset "BlockSparseMatrixAssemblers" begin 
+  include("BlockSparseMatrixAssemblersTests.jl") 
+end
 
 end # module