diff --git a/src/p_sparse_matrix.jl b/src/p_sparse_matrix.jl
index ce79c9bd..02708d21 100644
--- a/src/p_sparse_matrix.jl
+++ b/src/p_sparse_matrix.jl
@@ -1668,7 +1668,7 @@ function psparse_consistent_impl(
         n_ghost_cols = ghost_length(cols_co)
         TA = typeof(A.blocks.ghost_own)
         own_own = A.blocks.own_own
-        own_ghost = compresscoo(TA,I2,J2,V2,n_own_rows,n_ghost_cols)
+        own_ghost = compresscoo(TA,I2,J2,V2,n_own_rows,n_ghost_cols) # TODO this can be improved
         ghost_own = compresscoo(TA,I_rcv_own,J_rcv_own,V_rcv_own,n_ghost_rows,n_own_cols)
         ghost_ghost = compresscoo(TA,I_rcv_ghost,J_rcv_ghost,V_rcv_ghost,n_ghost_rows,n_ghost_cols)
         K_own = precompute_nzindex(ghost_own,I_rcv_own,J_rcv_own)
@@ -1743,6 +1743,10 @@ function psparse_consistent_impl!(B,A,::Type{<:AbstractSplitMatrix},cache)
         setcoofast!(B.blocks.ghost_ghost,V_rcv_ghost,K_ghost)
         B
     end
+    map(own_own_values(B),own_own_values(A)) do b,a
+        msg = "consistent!(B,A,cache) can only be called if B was obtained as B,cache = consistent(A)|>fetch"
+        @assert a === b msg
+    end
     map(setup_snd,partition(A),cache)
     parts_snd = map(i->i.parts_snd,cache)
     parts_rcv = map(i->i.parts_rcv,cache)
@@ -1750,6 +1754,11 @@ function psparse_consistent_impl!(B,A,::Type{<:AbstractSplitMatrix},cache)
     V_rcv = map(i->i.V_rcv,cache)
     graph = ExchangeGraph(parts_snd,parts_rcv)
     t = exchange!(V_rcv,V_snd,graph)
+    map(own_ghost_values(B),own_ghost_values(A)) do b,a
+        if nonzeros(b) !== nonzeros(a)
+            copy!(nonzeros(b),nonzeros(a))
+        end
+    end
     @async begin
         wait(t)
         map(setup_rcv,partition(B),cache)
diff --git a/test/p_sparse_matrix_tests.jl b/test/p_sparse_matrix_tests.jl
index 72060e16..3d71f16a 100644
--- a/test/p_sparse_matrix_tests.jl
+++ b/test/p_sparse_matrix_tests.jl
@@ -330,32 +330,49 @@ function p_sparse_matrix_tests(distribute)
     nodes_per_dir = (5,5)
     parts_per_dir = (2,2)
     A = PartitionedArrays.laplace_matrix(nodes_per_dir,parts_per_dir,parts)
-
-    B = A*A
     A_seq = centralize(A)
-    @test centralize(B) ≈ A_seq*A_seq
-
-    B = spmm(A,A)
-    @test centralize(B) ≈ A_seq*A_seq
-    B,cacheB = spmm(A,A;reuse=true)
-    spmm!(B,A,A,cacheB)
-    @test centralize(B) ≈ A_seq*A_seq
-
-    B = transpose(A)*A
-    @test centralize(B) ≈ transpose(A_seq)*A_seq
-
-    B = spmtm(A,A)
-    B,cacheB = spmtm(A,A;reuse=true)
-    @test centralize(B) ≈ transpose(A_seq)*A_seq
-    spmtm!(B,A,A,cacheB)
-    @test centralize(B) ≈ transpose(A_seq)*A_seq
-
-    C = rap(transpose(A),A,A)
-    @test centralize(C) ≈ transpose(A_seq)*A_seq*A_seq
-    C,cacheC = rap(transpose(A),A,A;reuse=true)
-    @test centralize(C) ≈ transpose(A_seq)*A_seq*A_seq
-    rap!(C,transpose(A),A,A,cacheC)
-    @test centralize(C) ≈ transpose(A_seq)*A_seq*A_seq
+    Z = 2*A
+    Z_seq = centralize(Z)
+
+    B = Z*A
+    @test centralize(B) ≈ Z_seq*A_seq
+
+    B = spmm(Z,A)
+    @test centralize(B) ≈ Z_seq*A_seq
+    B,cacheB = spmm(Z,A;reuse=true)
+    map(partition(A)) do A
+        nonzeros(A.blocks.own_own) .*= 4
+        nonzeros(A.blocks.own_ghost) .*= 4
+    end
+    A_seq = centralize(A)
+    spmm!(B,Z,A,cacheB)
+    @test centralize(B) ≈ Z_seq*(A_seq)
+
+    B = transpose(Z)*A
+    @test centralize(B) ≈ transpose(Z_seq)*A_seq
+
+    B = spmtm(Z,A)
+    B,cacheB = spmtm(Z,A;reuse=true)
+    @test centralize(B) ≈ transpose(Z_seq)*A_seq
+    map(partition(A)) do A
+        nonzeros(A.blocks.own_own) .*= 4
+        nonzeros(A.blocks.own_ghost) .*= 4
+    end
+    A_seq = centralize(A)
+    spmtm!(B,Z,A,cacheB)
+    @test centralize(B) ≈ transpose(Z_seq)*A_seq
+
+    C = rap(transpose(A),Z,A)
+    @test centralize(C) ≈ transpose(A_seq)*Z_seq*A_seq
+    C,cacheC = rap(transpose(A),Z,A;reuse=true)
+    @test centralize(C) ≈ transpose(A_seq)*Z_seq*A_seq
+    map(partition(A)) do A
+        nonzeros(A.blocks.own_own) .*= 4
+        nonzeros(A.blocks.own_ghost) .*= 4
+    end
+    A_seq = centralize(A)
+    rap!(C,transpose(A),Z,A,cacheC)
+    @test centralize(C) ≈ transpose(A_seq)*Z_seq*A_seq
 
     r = pzeros(partition(axes(A,2)))
     x = pones(partition(axes(A,1)))