diff --git a/Project.toml b/Project.toml
index 543c9a5d..5f898cc4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "FinEtools"
 uuid = "91bb5406-6c9a-523d-811d-0644c4229550"
 authors = ["Petr Krysl <pkrysl@ucsd.edu>"]
-version = "6.0.9"
+version = "6.0.10"
 
 [deps]
 ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
diff --git a/src/AssemblyModule.jl b/src/AssemblyModule.jl
index 2a63e486..866c3669 100644
--- a/src/AssemblyModule.jl
+++ b/src/AssemblyModule.jl
@@ -188,11 +188,17 @@ function assemble!(
     p = self.buffer_pointer
     @assert p + ncolumns * nrows <= self.buffer_length + 1
     @assert size(mat) == (nrows, ncolumns)
-    @inbounds for j in 1:ncolumns, i in 1:nrows
-        self.matbuffer[p] = mat[i, j] # serialized matrix
-        self.rowbuffer[p] = dofnums_row[i]
-        self.colbuffer[p] = dofnums_col[j]
-        p = p + 1
+    @inbounds for j in 1:ncolumns
+        if 0 < dofnums_col[j] <= self.ndofs_col
+            for i in 1:nrows
+                if 0 < dofnums_row[i] <= self.ndofs_row
+                    self.matbuffer[p] = mat[i, j] # serialized matrix
+                    self.rowbuffer[p] = dofnums_row[i]
+                    self.colbuffer[p] = dofnums_col[j]
+                    p = p + 1
+                end
+            end
+        end
     end
     self.buffer_pointer = p
     return self
@@ -204,28 +210,20 @@ end
 Make a sparse matrix.
 """
 function makematrix!(self::SysmatAssemblerSparse)
+    @assert length(self.rowbuffer) >= self.buffer_pointer - 1
+    @assert length(self.colbuffer) >= self.buffer_pointer - 1
+    # We have the option of retaining the assembled results, but not
+    # constructing the sparse matrix.
     if self.nomatrixresult
         # No actual sparse matrix is returned. The entire result of the assembly
-        # is preserved in the assembler buffers.
+        # is preserved in the assembler buffers. The ends of the buffers are
+        # filled with reasonable values, just in case someone wants to look at
+        # them.
+        self.rowbuffer[self.buffer_pointer:end] .= 1
+        self.colbuffer[self.buffer_pointer:end] .= 1
+        self.matbuffer[self.buffer_pointer:end] .= 0.0
         return spzeros(self.ndofs_row, self.ndofs_col)
     end
-    # Otherwise, we will accumulate the sparse matrix from the records in the
-    # buffers.
-    @assert length(self.rowbuffer) >= self.buffer_pointer - 1
-    @assert length(self.colbuffer) >= self.buffer_pointer - 1
-    # Here we will go through the rows and columns, and whenever the row or
-    # the column refer to indexes outside of the limits of the matrix, the
-    # corresponding value will be set to 0 and assembled to row and column 1.
-    @inbounds for j in 1:self.buffer_pointer-1
-        if (self.rowbuffer[j] > self.ndofs_row) || (self.rowbuffer[j] <= 0)
-            self.rowbuffer[j] = 1
-            self.matbuffer[j] = 0.0
-        end
-        if (self.colbuffer[j] > self.ndofs_col) || (self.colbuffer[j] <= 0)
-            self.colbuffer[j] = 1
-            self.matbuffer[j] = 0.0
-        end
-    end
     # The sparse matrix is constructed and returned. The  buffers used for
     # the assembly are cleared.
     S = sparse(
@@ -379,11 +377,17 @@ function assemble!(
     p = self.buffer_pointer
     @assert p + ncolumns * nrows <= self.buffer_length + 1
     @assert size(mat) == (nrows, ncolumns)
-    @inbounds for j in 1:ncolumns, i in j:nrows
-        self.matbuffer[p] = mat[i, j] # serialized matrix
-        self.rowbuffer[p] = dofnums[i]
-        self.colbuffer[p] = dofnums[j]
-        p = p + 1
+    @inbounds for j in 1:ncolumns
+        if 0 < dofnums[j] <= self.ndofs
+            for i in j:nrows
+                if 0 < dofnums[i] <= self.ndofs
+                    self.matbuffer[p] = mat[i, j] # serialized matrix
+                    self.rowbuffer[p] = dofnums[i]
+                    self.colbuffer[p] = dofnums[j]
+                    p = p + 1
+                end
+            end
+        end
     end
     self.buffer_pointer = p
     return self
@@ -395,28 +399,22 @@ end
 Make a sparse symmetric square matrix.
 """
 function makematrix!(self::SysmatAssemblerSparseSymm)
-    if self.nomatrixresult
-        # No actual sparse matrix is returned. The entire result of the assembly
-        # is preserved in the assembler buffers.
-        return spzeros(self.ndofs, self.ndofs)
-    end
-    # Otherwise, we will accumulate the sparse matrix from the records in the
-    # buffers.
     @assert length(self.rowbuffer) >= self.buffer_pointer - 1
     @assert length(self.colbuffer) >= self.buffer_pointer - 1
-    # Here we will go through the rows and columns, and whenever the row or
-    # the column refer to indexes outside of the limits of the matrix, the
-    # corresponding value will be set to 0 and assembled to row and column 1.
-    @inbounds for j in 1:self.buffer_pointer-1
-        if (self.rowbuffer[j] > self.ndofs) || (self.rowbuffer[j] <= 0)
-            self.rowbuffer[j] = 1
-            self.matbuffer[j] = 0.0
-        end
-        if (self.colbuffer[j] > self.ndofs) || (self.colbuffer[j] <= 0)
-            self.colbuffer[j] = 1
-            self.matbuffer[j] = 0.0
-        end
+    # We have the option of retaining the assembled results, but not
+    # constructing the sparse matrix.
+    if self.nomatrixresult
+        # No actual sparse matrix is returned. The entire result of the assembly
+        # is preserved in the assembler buffers. The ends of the buffers are
+        # filled with reasonable values, just in case someone wants to look at
+        # them.
+        self.rowbuffer[self.buffer_pointer:end] .= 1
+        self.colbuffer[self.buffer_pointer:end] .= 1
+        self.matbuffer[self.buffer_pointer:end] .= 0.0
+        return spzeros(self.ndofs_row, self.ndofs_col)
     end
+    # The sparse matrix is constructed and returned. The  buffers used for
+    # the assembly are cleared.
     S = sparse(
         self.rowbuffer[1:self.buffer_pointer-1],
         self.colbuffer[1:self.buffer_pointer-1],
@@ -547,10 +545,12 @@ function assemble!(
     @assert p + ncolumns <= self.buffer_length + 1
     @assert size(mat) == (nrows, ncolumns)
     @inbounds for j in 1:ncolumns
-        self.matbuffer[p] = mat[j, j] # serialized matrix
-        self.rowbuffer[p] = dofnums[j]
-        self.colbuffer[p] = dofnums[j]
-        p = p + 1
+        if 0 < dofnums[j] <= self.ndofs
+            self.matbuffer[p] = mat[j, j] # serialized matrix
+            self.rowbuffer[p] = dofnums[j]
+            self.colbuffer[p] = dofnums[j]
+            p = p + 1
+        end
     end
     self.buffer_pointer = p
     return self
@@ -562,28 +562,22 @@ end
 Make a sparse symmetric square diagonal matrix.
 """
 function makematrix!(self::SysmatAssemblerSparseDiag)
-    if self.nomatrixresult
-            # No actual sparse matrix is returned. The entire result of the assembly
-            # is preserved in the assembler buffers.
-        return spzeros(self.ndofs, self.ndofs)
-    end
-    # Otherwise, we will accumulate the sparse matrix from the records in the
-    # buffers.
     @assert length(self.rowbuffer) >= self.buffer_pointer - 1
     @assert length(self.colbuffer) >= self.buffer_pointer - 1
-    # Here we will go through the rows and columns, and whenever the row or
-    # the column refer to indexes outside of the limits of the matrix, the
-    # corresponding value will be set to 0 and assembled to row and column 1.
-    @inbounds for j in 1:self.buffer_pointer-1
-        if (self.rowbuffer[j] > self.ndofs) || (self.rowbuffer[j] <= 0)
-            self.rowbuffer[j] = 1
-            self.matbuffer[j] = 0.0
-        end
-        if (self.colbuffer[j] > self.ndofs) || (self.colbuffer[j] <= 0)
-            self.colbuffer[j] = 1
-            self.matbuffer[j] = 0.0
-        end
+    # We have the option of retaining the assembled results, but not
+    # constructing the sparse matrix.
+    if self.nomatrixresult
+        # No actual sparse matrix is returned. The entire result of the assembly
+        # is preserved in the assembler buffers. The ends of the buffers are
+        # filled with reasonable values, just in case someone wants to look at
+        # them.
+        self.rowbuffer[self.buffer_pointer:end] .= 1
+        self.colbuffer[self.buffer_pointer:end] .= 1
+        self.matbuffer[self.buffer_pointer:end] .= 0.0
+        return spzeros(self.ndofs_row, self.ndofs_col)
     end
+    # The sparse matrix is constructed and returned. The  buffers used for
+    # the assembly are cleared.
     S = sparse(
         self.rowbuffer[1:self.buffer_pointer-1],
         self.colbuffer[1:self.buffer_pointer-1],
@@ -840,16 +834,22 @@ function assemble!(
     # Now comes the lumping procedure
     em2 = sum(sum(mat, dims = 1)) # total mass times the number of space dimensions
     dem2 = zero(eltype(mat)) # total mass on the diagonal times the number of space dimensions
-    for i in 1:nrows
+    @inbounds for i in 1:nrows
         dem2 += mat[i, i]
     end
     ffactor = em2 / dem2 # total-element-mass compensation factor
-    @inbounds for j in 1:ncolumns,  i in j:nrows
-        if i == j # only the diagonal elements are assembled
-            self.matbuffer[p] = mat[i, j] * ffactor # serialized matrix
-            self.rowbuffer[p] = dofnums[i]
-            self.colbuffer[p] = dofnums[j]
-            p = p + 1
+    @inbounds for j in 1:ncolumns
+        if 0 < dofnums[j] <= self.ndofs
+            for i in j:nrows
+                if i == j # only the diagonal elements are assembled
+                    if 0 < dofnums[i] <= self.ndofs
+                        self.matbuffer[p] = mat[i, j] * ffactor # serialized matrix
+                        self.rowbuffer[p] = dofnums[i]
+                        self.colbuffer[p] = dofnums[j]
+                        p = p + 1
+                    end
+                end
+            end
         end
     end
     self.buffer_pointer = p
@@ -877,28 +877,22 @@ end
 Make a sparse HRZ-lumped **symmetric square**  matrix.
 """
 function makematrix!(self::SysmatAssemblerSparseHRZLumpingSymm)
-    if self.nomatrixresult
-            # No actual sparse matrix is returned. The entire result of the assembly
-            # is preserved in the assembler buffers.
-        return spzeros(self.ndofs, self.ndofs)
-    end
-    # Otherwise, we will accumulate the sparse matrix from the records in the
-    # buffers.
     @assert length(self.rowbuffer) >= self.buffer_pointer - 1
     @assert length(self.colbuffer) >= self.buffer_pointer - 1
-    # Here we will go through the rows and columns, and whenever the row or
-    # the column refer to indexes outside of the limits of the matrix, the
-    # corresponding value will be set to 0 and assembled to row and column 1.
-    @inbounds for j in 1:self.buffer_pointer-1
-        if (self.rowbuffer[j] > self.ndofs) || (self.rowbuffer[j] <= 0)
-            self.rowbuffer[j] = 1
-            self.matbuffer[j] = 0.0
-        end
-        if (self.colbuffer[j] > self.ndofs) || (self.colbuffer[j] <= 0)
-            self.colbuffer[j] = 1
-            self.matbuffer[j] = 0.0
-        end
+    # We have the option of retaining the assembled results, but not
+    # constructing the sparse matrix.
+    if self.nomatrixresult
+        # No actual sparse matrix is returned. The entire result of the assembly
+        # is preserved in the assembler buffers. The ends of the buffers are
+        # filled with reasonable values, just in case someone wants to look at
+        # them.
+        self.rowbuffer[self.buffer_pointer:end] .= 1
+        self.colbuffer[self.buffer_pointer:end] .= 1
+        self.matbuffer[self.buffer_pointer:end] .= 0.0
+        return spzeros(self.ndofs_row, self.ndofs_col)
     end
+    # The sparse matrix is constructed and returned. The  buffers used for
+    # the assembly are cleared.
     S = sparse(
         self.rowbuffer[1:self.buffer_pointer-1],
         self.colbuffer[1:self.buffer_pointer-1],
diff --git a/test/test_basics.jl b/test/test_basics.jl
index 5236e930..ad2db669 100644
--- a/test/test_basics.jl
+++ b/test/test_basics.jl
@@ -1513,6 +1513,7 @@ end
 using .mwithnodes4
 mwithnodes4.test()
 
+
 module mutil_th001
 
 using Test
@@ -1550,54 +1551,35 @@ function _test()
     (m1, r1, c1),
     (m2, r2, c2),
     (m2, r2, c2),
-    (m3, r3, c3),
-    (m4, r4, c4),
-    (m1, r1, c1),
-    (m1, r1, c1),
     (m2, r2, c2),
     (m2, r2, c2),
-    (m4, r4, c4),
-    (m3, r3, c3),
-    (m3, r3, c3),
-    (m1, r1, c1),
-    (m1, r1, c1),
     (m2, r2, c2),
     (m2, r2, c2),
-    (m3, r3, c3),
-    (m4, r4, c4),
-    (m1, r1, c1),
-    (m1, r1, c1),
     (m2, r2, c2),
     (m2, r2, c2),
-    (m4, r4, c4),
-    (m3, r3, c3),
-    (m3, r3, c3),
-    (m1, r1, c1),
-    (m1, r1, c1),
     (m2, r2, c2),
     (m2, r2, c2),
-    (m3, r3, c3),
-    (m4, r4, c4),
-    (m1, r1, c1),
-    (m1, r1, c1),
     (m2, r2, c2),
     (m2, r2, c2),
-    (m4, r4, c4),
     (m3, r3, c3),
     (m3, r3, c3),
-    (m1, r1, c1),
-    (m1, r1, c1),
-    (m2, r2, c2),
-    (m2, r2, c2),
     (m3, r3, c3),
+    (m3, r3, c3),
+    (m3, r3, c3),
+    (m3, r3, c3),
+    (m4, r4, c4),
+    (m4, r4, c4),
+    (m4, r4, c4),
+    (m4, r4, c4),
+    (m4, r4, c4),
     (m4, r4, c4),
     (m1, r1, c1),
     (m1, r1, c1),
+    (m1, r1, c1),
+    (m1, r1, c1),
+    (m1, r1, c1),
+    (m1, r1, c1),
     (m2, r2, c2),
-    (m2, r2, c2),
-    (m4, r4, c4),
-    (m3, r3, c3),
-    (m3, r3, c3),
     ]
     elem_mat_nrows = 5
     elem_mat_ncols = 5
@@ -1621,17 +1603,19 @@ function _test()
     ntasks = Base.Threads.nthreads()
     istart = 1; iend = 0;
     for ch in chunks(1:length(assembly_line), ntasks)
-        # @show ch[2], ch[1]
+        # @show ch
         buffer_length = 5 * 5 * length(ch[1])
         iend = iend + buffer_length
         matbuffer = view(a.matbuffer, istart:iend)
         rowbuffer = view(a.rowbuffer, istart:iend)
         colbuffer = view(a.colbuffer, istart:iend)
+        # @show length(colbuffer), istart, iend, buffer_length
         buffer_pointer = 1
-        a1 = SysmatAssemblerSparse(buffer_length, matbuffer, rowbuffer, colbuffer, buffer_pointer, ndofs_row, ndofs_col, false)
+        a1 = SysmatAssemblerSparse(buffer_length, matbuffer, rowbuffer, colbuffer, buffer_pointer, ndofs_row, ndofs_col, true)
         for i in ch[1]
             assemble!(a1, assembly_line[i]...)
         end
+        makematrix!(a1)
         istart = iend + 1
     end
     a.buffer_pointer = iend
@@ -1655,11 +1639,12 @@ function _test()
             rowbuffer .= 1
             colbuffer .= 1
             buffer_pointer = 1
-            a1 = SysmatAssemblerSparse(buffer_length, matbuffer, rowbuffer, colbuffer, buffer_pointer, ndofs_row, ndofs_col, false)
+            a1 = SysmatAssemblerSparse(buffer_length, matbuffer, rowbuffer, colbuffer, buffer_pointer, ndofs_row, ndofs_col, true)
             Threads.@spawn let r =  $ch[1]
                 for i in r
                     assemble!(a1, assembly_line[i]...)
                 end
+                makematrix!(a1)
             end
             istart = iend + 1
         end
@@ -1759,14 +1744,12 @@ function _test()
         matbuffer = view(a.matbuffer, istart:iend)
         rowbuffer = view(a.rowbuffer, istart:iend)
         colbuffer = view(a.colbuffer, istart:iend)
-        matbuffer .= 0.0
-        rowbuffer .= 1
-        colbuffer .= 1
         buffer_pointer = 1
-        a1 = SysmatAssemblerSparse(buffer_length, matbuffer, rowbuffer, colbuffer, buffer_pointer, ndofs_row, ndofs_col, false)
+        a1 = SysmatAssemblerSparse(buffer_length, matbuffer, rowbuffer, colbuffer, buffer_pointer, ndofs_row, ndofs_col, true)
         for i in ch[1]
             assemble!(a1, assembly_line[i]...)
         end
+        makematrix!(a1)
         # @show "done $(ch[2])"
         istart = iend + 1
     end
@@ -1792,14 +1775,12 @@ function _test()
             colbuffer = view(a.colbuffer, istart:iend)
             buffer_pointer = 1
             Threads.@spawn let r =  $ch[1]
-                matbuffer .= 0.0
-                rowbuffer .= 1
-                colbuffer .= 1
-                a1 = SysmatAssemblerSparse(buffer_length, $matbuffer, $rowbuffer, $colbuffer, $buffer_pointer, ndofs_row, ndofs_col, false)
+                a1 = SysmatAssemblerSparse(buffer_length, $matbuffer, $rowbuffer, $colbuffer, $buffer_pointer, ndofs_row, ndofs_col, true)
                 # @show ch[2], r
                 for i in r
                     assemble!(a1, assembly_line[i]...)
                 end
+                makematrix!(a1)
             end
             # @show "done $(ch[2])"
             istart = iend + 1
@@ -1905,7 +1886,7 @@ function _test()
         rowbuffer .= 1
         colbuffer .= 1
         buffer_pointer = 1
-        a1 = SysmatAssemblerSparse(buffer_length, matbuffer, rowbuffer, colbuffer, buffer_pointer, ndofs_row, ndofs_col, false)
+        a1 = SysmatAssemblerSparse(buffer_length, matbuffer, rowbuffer, colbuffer, buffer_pointer, ndofs_row, ndofs_col, true)
         for i in ch[1]
             assemble!(a1, assembly_line[i]...)
         end
@@ -1934,7 +1915,7 @@ function _test()
         matbuffer .= 0.0
         rowbuffer .= 1
         colbuffer .= 1
-        a1 = SysmatAssemblerSparse(buffer_length, matbuffer, rowbuffer, colbuffer, buffer_pointer, ndofs_row, ndofs_col, false)
+        a1 = SysmatAssemblerSparse(buffer_length, matbuffer, rowbuffer, colbuffer, buffer_pointer, ndofs_row, ndofs_col, true)
     end
 
     # Parallel execution