squashfs: cache the last block decompressed in the file for 10x speedup

Before this change, we would spend an awful lot of CPU time reading and decompressing the same block to only read a small part from it. This change caches the last block used in the file handle and for typical block sizes used in reads this makes a 10x difference in throughput.
diskfs · Dec 22, 2023 · ef160a9 · ef160a9
1 parent 21cc751
commit ef160a9
Showing 1 changed file with 19 additions and 7 deletions.
diff --git a/filesystem/squashfs/file.go b/filesystem/squashfs/file.go
@@ -13,10 +13,12 @@ import (
 //	include all of the data
 type File struct {
 	*extendedFile
-	isReadWrite bool
-	isAppend    bool
-	offset      int64
-	filesystem  *FileSystem
+	isReadWrite   bool
+	isAppend      bool
+	offset        int64
+	filesystem    *FileSystem
+	blockLocation int64  // the position of the last block decompressed
+	block         []byte // the actual last block decompressed
 }
 
 // Read reads up to len(b) bytes from the File.
@@ -99,9 +101,19 @@ func (fl *File) Read(b []byte) (int, error) {
 			if int64(block.size) > fs.blocksize {
 				return read, fmt.Errorf("unexpected block.size=%d > fs.blocksize=%d", block.size, fs.blocksize)
 			}
-			input, err := fs.readBlock(location, block.compressed, block.size)
-			if err != nil {
-				return read, fmt.Errorf("error reading data block %d from squashfs: %v", i, err)
+			var input []byte
+			if fl.blockLocation == location && fl.block != nil {
+				// Read last block from cache
+				input = fl.block
+			} else {
+				var err error
+				input, err = fs.readBlock(location, block.compressed, block.size)
+				if err != nil {
+					return read, fmt.Errorf("error reading data block %d from squashfs: %v", i, err)
+				}
+				// Cache the last block
+				fl.blockLocation = location
+				fl.block = input
 			}
 			outputBlock(input)
 		}