From 39d5a56278ad467af914695f0b6dafa1f793ab59 Mon Sep 17 00:00:00 2001 From: Avi Deitcher Date: Mon, 24 Sep 2018 21:34:23 +0300 Subject: [PATCH] ext4 Signed-off-by: Avi Deitcher --- disk/disk.go | 3 + filesystem/ext4/bitmaps.go | 104 +++ filesystem/ext4/blockgroup.go | 53 ++ filesystem/ext4/checksum.go | 48 + filesystem/ext4/common_test.go | 713 ++++++++++++++ filesystem/ext4/crc/crc16.go | 44 + filesystem/ext4/crc/crc32.go | 74 ++ filesystem/ext4/directory.go | 211 +++++ filesystem/ext4/directory_test.go | 24 + filesystem/ext4/directoryentry.go | 176 ++++ filesystem/ext4/directoryentry_test.go | 24 + filesystem/ext4/dirhash.go | 157 ++++ filesystem/ext4/dirhash_test.go | 1 + filesystem/ext4/ext4.go | 1084 ++++++++++++++++++++++ filesystem/ext4/ext4.md | 290 ++++++ filesystem/ext4/ext4_test.go | 148 +++ filesystem/ext4/extent.go | 320 +++++++ filesystem/ext4/features.go | 451 +++++++++ filesystem/ext4/file.go | 118 +++ filesystem/ext4/fileinfo.go | 48 + filesystem/ext4/groupdescriptors.go | 310 +++++++ filesystem/ext4/groupdescriptors_test.go | 101 ++ filesystem/ext4/inode.go | 588 ++++++++++++ filesystem/ext4/journaldevice_other.go | 12 + filesystem/ext4/journaldevice_shared.go | 40 + filesystem/ext4/journaldevice_windows.go | 11 + filesystem/ext4/md4/md4.go | 73 ++ filesystem/ext4/md4/md4_test.go | 151 +++ filesystem/ext4/miscflags.go | 34 + filesystem/ext4/mountoptions.go | 182 ++++ filesystem/ext4/superblock.go | 763 +++++++++++++++ filesystem/ext4/superblock_test.go | 65 ++ filesystem/ext4/testdata/.gitignore | 1 + filesystem/ext4/testdata/README.md | 31 + filesystem/ext4/testdata/buildimg.sh | 44 + filesystem/ext4/util.go | 106 +++ filesystem/ext4/util_test.go | 200 ++++ filesystem/filesystem.go | 2 + go.mod | 5 +- go.sum | 7 + util/uniqify.go | 13 + 41 files changed, 6829 insertions(+), 1 deletion(-) create mode 100644 filesystem/ext4/bitmaps.go create mode 100644 filesystem/ext4/blockgroup.go create mode 100644 filesystem/ext4/checksum.go create mode 100644 filesystem/ext4/common_test.go create mode 100644 filesystem/ext4/crc/crc16.go create mode 100644 filesystem/ext4/crc/crc32.go create mode 100644 filesystem/ext4/directory.go create mode 100644 filesystem/ext4/directory_test.go create mode 100644 filesystem/ext4/directoryentry.go create mode 100644 filesystem/ext4/directoryentry_test.go create mode 100644 filesystem/ext4/dirhash.go create mode 100644 filesystem/ext4/dirhash_test.go create mode 100644 filesystem/ext4/ext4.go create mode 100644 filesystem/ext4/ext4.md create mode 100644 filesystem/ext4/ext4_test.go create mode 100644 filesystem/ext4/extent.go create mode 100644 filesystem/ext4/features.go create mode 100644 filesystem/ext4/file.go create mode 100644 filesystem/ext4/fileinfo.go create mode 100644 filesystem/ext4/groupdescriptors.go create mode 100644 filesystem/ext4/groupdescriptors_test.go create mode 100644 filesystem/ext4/inode.go create mode 100644 filesystem/ext4/journaldevice_other.go create mode 100644 filesystem/ext4/journaldevice_shared.go create mode 100644 filesystem/ext4/journaldevice_windows.go create mode 100644 filesystem/ext4/md4/md4.go create mode 100644 filesystem/ext4/md4/md4_test.go create mode 100644 filesystem/ext4/miscflags.go create mode 100644 filesystem/ext4/mountoptions.go create mode 100644 filesystem/ext4/superblock.go create mode 100644 filesystem/ext4/superblock_test.go create mode 100644 filesystem/ext4/testdata/.gitignore create mode 100644 filesystem/ext4/testdata/README.md create mode 100755 filesystem/ext4/testdata/buildimg.sh create mode 100644 filesystem/ext4/util.go create mode 100644 filesystem/ext4/util_test.go create mode 100644 util/uniqify.go diff --git a/disk/disk.go b/disk/disk.go index ad16e068..011a8671 100644 --- a/disk/disk.go +++ b/disk/disk.go @@ -13,6 +13,7 @@ import ( log "github.com/sirupsen/logrus" "github.com/diskfs/go-diskfs/filesystem" + "github.com/diskfs/go-diskfs/filesystem/ext4" "github.com/diskfs/go-diskfs/filesystem/fat32" "github.com/diskfs/go-diskfs/filesystem/iso9660" "github.com/diskfs/go-diskfs/filesystem/squashfs" @@ -185,6 +186,8 @@ func (d *Disk) CreateFilesystem(spec FilesystemSpec) (filesystem.FileSystem, err return fat32.Create(d.File, size, start, d.LogicalBlocksize, spec.VolumeLabel) case filesystem.TypeISO9660: return iso9660.Create(d.File, size, start, d.LogicalBlocksize, spec.WorkDir) + case filesystem.TypeExt4: + return ext4.Create(d.File, size, start, d.LogicalBlocksize, nil) case filesystem.TypeSquashfs: return nil, errors.New("squashfs is a read-only filesystem") default: diff --git a/filesystem/ext4/bitmaps.go b/filesystem/ext4/bitmaps.go new file mode 100644 index 00000000..1363ff2b --- /dev/null +++ b/filesystem/ext4/bitmaps.go @@ -0,0 +1,104 @@ +package ext4 + +import "fmt" + +// bitmap is a structure holding a bitmap +type bitmap struct { + bits []byte +} + +// bitmapFromBytes create a bitmap struct from bytes +func bitmapFromBytes(b []byte) *bitmap { + // just copy them over + bits := make([]byte, len(b)) + copy(bits, b) + bm := bitmap{ + bits: bits, + } + + return &bm +} + +// toBytes returns raw bytes ready to be written to disk +func (bm *bitmap) toBytes() []byte { + b := make([]byte, len(bm.bits)) + copy(b, bm.bits) + + return b +} + +func (bm *bitmap) checkFree(location int) (bool, error) { + byteNumber, bitNumber := findBitForIndex(location) + if byteNumber > len(bm.bits) { + return false, fmt.Errorf("location %d is not in %d size bitmap", location, len(bm.bits)*8) + } + mask := byte(0x1) << bitNumber + return bm.bits[byteNumber]&mask == mask, nil +} + +func (bm *bitmap) free(location int) error { + byteNumber, bitNumber := findBitForIndex(location) + if byteNumber > len(bm.bits) { + return fmt.Errorf("location %d is not in %d size bitmap", location, len(bm.bits)*8) + } + mask := byte(0x1) << bitNumber + mask = ^mask + bm.bits[byteNumber] &= mask + return nil +} + +func (bm *bitmap) use(location int) error { + byteNumber, bitNumber := findBitForIndex(location) + if byteNumber > len(bm.bits) { + return fmt.Errorf("location %d is not in %d size bitmap", location, len(bm.bits)*8) + } + mask := byte(0x1) << bitNumber + bm.bits[byteNumber] |= mask + return nil +} + +func (bm *bitmap) findFirstFree() int { + var location = -1 + for i, b := range bm.bits { + // if all used, continue to next + if b&0xff == 0xff { + continue + } + // not all used, so find first bit set to 0 + for j := uint8(0); j < 8; j++ { + mask := byte(0x1) << j + if b&mask != mask { + location = 8*i + (8 - int(j)) + break + } + } + break + } + return location +} + +//nolint:revive // params are unused as of yet, but will be used in the future +func (bm *bitmap) findFirstUsed() int { + var location int = -1 + for i, b := range bm.bits { + // if all free, continue to next + if b == 0x00 { + continue + } + // not all free, so find first bit set to 1 + for j := uint8(0); j < 8; j++ { + mask := byte(0x1) << j + mask = ^mask + if b|mask != mask { + location = 8*i + (8 - int(j)) + break + } + } + break + } + return location +} + +func findBitForIndex(index int) (byteNumber int, bitNumber uint8) { + return index / 8, uint8(index % 8) +} diff --git a/filesystem/ext4/blockgroup.go b/filesystem/ext4/blockgroup.go new file mode 100644 index 00000000..92deb6d4 --- /dev/null +++ b/filesystem/ext4/blockgroup.go @@ -0,0 +1,53 @@ +package ext4 + +import ( + "fmt" +) + +// blockGroup is a structure holding the data about a single block group +// +//nolint:unused // will be used in the future, not yet +type blockGroup struct { + inodeBitmap *bitmap + blockBitmap *bitmap + blockSize int + number int + inodeTableSize int + firstDataBlock int +} + +// blockGroupFromBytes create a blockGroup struct from bytes +// it does not load the inode table or data blocks into memory, rather holding pointers to where they are +// +//nolint:unused // will be used in the future, not yet +func blockGroupFromBytes(b []byte, blockSize, groupNumber int) (*blockGroup, error) { + expectedSize := 2 * blockSize + actualSize := len(b) + if actualSize != expectedSize { + return nil, fmt.Errorf("expected to be passed %d bytes for 2 blocks of size %d, instead received %d", expectedSize, blockSize, actualSize) + } + inodeBitmap := bitmapFromBytes(b[0:blockSize]) + blockBitmap := bitmapFromBytes(b[blockSize : 2*blockSize]) + + bg := blockGroup{ + inodeBitmap: inodeBitmap, + blockBitmap: blockBitmap, + number: groupNumber, + blockSize: blockSize, + } + return &bg, nil +} + +// toBytes returns bitmaps ready to be written to disk +// +//nolint:unused // will be used in the future, not yet +func (bg *blockGroup) toBytes() ([]byte, error) { + b := make([]byte, 2*bg.blockSize) + inodeBitmapBytes := bg.inodeBitmap.toBytes() + blockBitmapBytes := bg.blockBitmap.toBytes() + + b = append(b, inodeBitmapBytes...) + b = append(b, blockBitmapBytes...) + + return b, nil +} diff --git a/filesystem/ext4/checksum.go b/filesystem/ext4/checksum.go new file mode 100644 index 00000000..ccd65d85 --- /dev/null +++ b/filesystem/ext4/checksum.go @@ -0,0 +1,48 @@ +package ext4 + +import ( + "encoding/binary" + + "github.com/diskfs/go-diskfs/filesystem/ext4/crc" +) + +// checksumAppender is a function that takes a byte slice and returns a byte slice with a checksum appended +type checksumAppender func([]byte) []byte +type checksummer func([]byte) uint32 + +// directoryChecksummer returns a function that implements checksumAppender for a directory entries block +// original calculations can be seen for e2fsprogs https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/lib/ext2fs/csum.c#n301 +// and in the linux tree https://github.com/torvalds/linux/blob/master/fs/ext4/namei.c#L376-L384 +func directoryChecksummer(seed, inodeNumber, inodeGeneration uint32) checksummer { + numBytes := make([]byte, 4) + binary.LittleEndian.PutUint32(numBytes, inodeNumber) + crcResult := crc.CRC32c(seed, numBytes) + genBytes := make([]byte, 4) + binary.LittleEndian.PutUint32(genBytes, inodeGeneration) + crcResult = crc.CRC32c(crcResult, genBytes) + return func(b []byte) uint32 { + checksum := crc.CRC32c(crcResult, b) + return checksum + } +} + +// directoryChecksumAppender returns a function that implements checksumAppender for a directory entries block +// original calculations can be seen for e2fsprogs https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/lib/ext2fs/csum.c#n301 +// and in the linux tree https://github.com/torvalds/linux/blob/master/fs/ext4/namei.c#L376-L384 +func directoryChecksumAppender(seed, inodeNumber, inodeGeneration uint32) checksumAppender { + fn := directoryChecksummer(seed, inodeNumber, inodeGeneration) + return func(b []byte) []byte { + checksum := fn(b) + checksumBytes := make([]byte, 12) + checksumBytes[4] = 12 + checksumBytes[7] = 0xde + binary.LittleEndian.PutUint32(checksumBytes[8:12], checksum) + b = append(b, checksumBytes...) + return b + } +} + +// nullDirectoryChecksummer does not change anything +func nullDirectoryChecksummer(b []byte) []byte { + return b +} diff --git a/filesystem/ext4/common_test.go b/filesystem/ext4/common_test.go new file mode 100644 index 00000000..33c30a25 --- /dev/null +++ b/filesystem/ext4/common_test.go @@ -0,0 +1,713 @@ +package ext4 + +import ( + "bufio" + "bytes" + "encoding/binary" + "fmt" + "math" + "os" + "os/exec" + "regexp" + "strconv" + "strings" + "testing" + "time" + + uuid "github.com/satori/go.uuid" +) + +const ( + imgFile = "testdata/dist/ext4.img" + fooDirFile = "testdata/dist/foo_dir.txt" + testGDTFile = "testdata/dist/gdt.bin" + rootDirFile = "testdata/dist/root_dir.txt" + testRootDirFile = "testdata/dist/root_directory.bin" + testSuperblockFile = "testdata/dist/superblock.bin" + testFilesystemStats = "testdata/dist/stats.txt" +) + +// TestMain sets up the test environment and runs the tests +func TestMain(m *testing.M) { + // Check and generate artifacts if necessary + if _, err := os.Stat(imgFile); os.IsNotExist(err) { + // Run the genartifacts.sh script + cmd := exec.Command("sh", "buildimg.sh") + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Dir = "testdata" + + // Execute the command + if err := cmd.Run(); err != nil { + println("error generating test artifacts for ext4", err) + os.Exit(1) + } + } + + // Run the tests + code := m.Run() + + // Exit with the appropriate code + os.Exit(code) +} + +type testGDTLineHandler struct { + re *regexp.Regexp + handler func(*groupDescriptor, []string) error +} + +var ( + gdtRELines = []testGDTLineHandler{ + {regexp.MustCompile(`^Group (\d+): \(Blocks (\d+)-(\d+)\) csum 0x([0-9a-f]+) \[(.*)]`), func(gd *groupDescriptor, matches []string) error { + // group number + number, err := strconv.ParseUint(matches[1], 10, 16) + if err != nil { + return fmt.Errorf("failed to parse group number: %v", err) + } + gd.number = uint16(number) + // parse the flags + flags := strings.Split(matches[5], ",") + for _, flag := range flags { + switch strings.TrimSpace(flag) { + case "ITABLE_ZEROED": + gd.flags.inodeTableZeroed = true + case "INODE_UNINIT": + gd.flags.inodesUninitialized = true + case "BLOCK_UNINIT": + gd.flags.blockBitmapUninitialized = true + default: + return fmt.Errorf("unknown flag %s", flag) + } + } + return nil + }}, + {regexp.MustCompile(`Block bitmap at (\d+) \(.*\), csum (0x[0-9A-Fa-f]+)$`), func(gd *groupDescriptor, matches []string) error { + // block bitmap + blockBitmap, err := strconv.ParseUint(matches[1], 10, 64) + if err != nil { + return fmt.Errorf("failed to parse block bitmap: %v", err) + } + gd.blockBitmapLocation = blockBitmap + // block bitmap checksum + blockBitmapChecksum, err := strconv.ParseUint(matches[2], 0, 32) + if err != nil { + return fmt.Errorf("failed to parse block bitmap checksum: %v", err) + } + gd.blockBitmapChecksum = uint32(blockBitmapChecksum) + return nil + + }}, + {regexp.MustCompile(`Inode bitmap at (\d+) \(.*\), csum (0x[0-9a-fA-F]+)$`), func(gd *groupDescriptor, matches []string) error { + // inode bitmap + inodeBitmap, err := strconv.ParseUint(matches[1], 10, 64) + if err != nil { + return fmt.Errorf("failed to parse inode bitmap: %v", err) + } + gd.inodeBitmapLocation = inodeBitmap + // inode bitmap checksum + inodeBitmapChecksum, err := strconv.ParseUint(matches[2], 0, 32) + if err != nil { + return fmt.Errorf("failed to parse inode bitmap checksum: %v", err) + } + gd.inodeBitmapChecksum = uint32(inodeBitmapChecksum) + return nil + + }}, + {regexp.MustCompile(`Inode table at (\d+)-(\d+) (.*)$`), func(gd *groupDescriptor, matches []string) error { + // inode table location + inodeTableStart, err := strconv.ParseUint(matches[1], 10, 64) + if err != nil { + return fmt.Errorf("failed to parse inode table start: %v", err) + } + gd.inodeTableLocation = inodeTableStart + return nil + }}, + {regexp.MustCompile(`(\d+) free blocks, (\d+) free inodes, (\d+) directories(, (\d+) unused inodes)?`), func(gd *groupDescriptor, matches []string) error { + // free blocks + freeBlocks, err := strconv.ParseUint(matches[1], 10, 32) + if err != nil { + return fmt.Errorf("failed to parse free blocks: %v", err) + } + gd.freeBlocks = uint32(freeBlocks) + // free inodes + freeInodes, err := strconv.ParseUint(matches[2], 10, 32) + if err != nil { + return fmt.Errorf("failed to parse free inodes: %v", err) + } + gd.freeInodes = uint32(freeInodes) + // directories + directories, err := strconv.ParseUint(matches[3], 10, 32) + if err != nil { + return fmt.Errorf("failed to parse directories: %v", err) + } + gd.usedDirectories = uint32(directories) + // unused inodes + if len(matches) > 5 && matches[5] != "" { + unusedInodes, err := strconv.ParseUint(matches[5], 10, 32) + if err != nil { + return fmt.Errorf("failed to parse unused inodes: %v", err) + } + gd.unusedInodes = uint32(unusedInodes) + } + return nil + }}, + } +) + +type testSuperblockFunc func(*superblock, string) error + +var testSuperblockFuncs = map[string]testSuperblockFunc{ + "Filesystem state": func(sb *superblock, value string) error { + switch value { + case "clean": + sb.filesystemState = fsStateCleanlyUnmounted + default: + sb.filesystemState = fsStateErrors + } + return nil + }, + "Errors behavior": func(sb *superblock, value string) error { + switch value { + case "Continue": + sb.errorBehaviour = errorsContinue + default: + sb.errorBehaviour = errorsPanic + } + return nil + }, + "Last mounted on": func(sb *superblock, value string) error { + sb.lastMountedDirectory = value + return nil + }, + "Filesystem UUID": func(sb *superblock, value string) error { + uuid, err := uuid.FromString(value) + if err != nil { + return err + } + sb.uuid = &uuid + return nil + }, + "Filesystem magic number": func(_ *superblock, value string) error { + if value != "0xEF53" { + return fmt.Errorf("invalid magic number %s", value) + } + return nil + }, + "Filesystem volume name": func(sb *superblock, value string) error { + if value != "" { + sb.volumeLabel = value + } + return nil + }, + "Filesystem revision #": func(sb *superblock, value string) error { + // just need the first part, as it sometimes looks like: 1 (dynamic) + value = strings.Split(value, " ")[0] + rev, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("filesystem revision: %w", err) + } + sb.revisionLevel = uint32(rev) + return nil + }, + "Inode count": func(sb *superblock, value string) error { + inodeCount, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Inode count: %w", err) + } + sb.inodeCount = uint32(inodeCount) + return nil + }, + "Block count": func(sb *superblock, value string) error { + blockCount, err := strconv.ParseUint(value, 10, 64) + if err != nil { + return fmt.Errorf("Block count: %w", err) + } + sb.blockCount = blockCount + return nil + }, + "Reserved block count": func(sb *superblock, value string) error { + reservedBlocks, err := strconv.ParseUint(value, 10, 64) + if err != nil { + return fmt.Errorf("Reserved block count: %w", err) + } + sb.reservedBlocks = reservedBlocks + return nil + }, + "Overhead clusters": func(sb *superblock, value string) error { + overheadBlocks, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Overhead clusters: %w", err) + } + sb.overheadBlocks = uint32(overheadBlocks) + return nil + }, + "Free blocks": func(sb *superblock, value string) error { + freeBlocks, err := strconv.ParseUint(value, 10, 64) + if err != nil { + return fmt.Errorf("Free blocks: %w", err) + } + sb.freeBlocks = freeBlocks + return nil + }, + "Free inodes": func(sb *superblock, value string) error { + freeInodes, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Free inodes: %w", err) + } + sb.freeInodes = uint32(freeInodes) + return nil + }, + "First block": func(sb *superblock, value string) error { + firstBlock, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("First block: %w", err) + } + sb.firstDataBlock = uint32(firstBlock) + return nil + }, + "Block size": func(sb *superblock, value string) error { + blockSize, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Block size: %w", err) + } + sb.blockSize = uint32(blockSize) + return nil + }, + "Group descriptor size": func(sb *superblock, value string) error { + groupDescriptorSize, err := strconv.ParseUint(value, 10, 16) + if err != nil { + return fmt.Errorf("Group descriptor size: %w", err) + } + sb.groupDescriptorSize = uint16(groupDescriptorSize) + return nil + }, + "Reserved GDT blocks": func(sb *superblock, value string) error { + reservedGDTBlocks, err := strconv.ParseUint(value, 10, 16) + if err != nil { + return fmt.Errorf("Reserved GDT blocks: %w", err) + } + sb.reservedGDTBlocks = uint16(reservedGDTBlocks) + sb.features.reservedGDTBlocksForExpansion = true + return nil + }, + "Blocks per group": func(sb *superblock, value string) error { + blocksPerGroup, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Blocks per group: %w", err) + } + sb.blocksPerGroup = uint32(blocksPerGroup) + return nil + }, + "Inodes per group": func(sb *superblock, value string) error { + inodesPerGroup, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Inodes per group: %w", err) + } + sb.inodesPerGroup = uint32(inodesPerGroup) + return nil + }, + "Flex block group size": func(sb *superblock, value string) error { + flexBlockGroupSize, err := strconv.ParseUint(value, 10, 64) + if err != nil { + return fmt.Errorf("Flex block group size: %w", err) + } + sb.logGroupsPerFlex = flexBlockGroupSize + return nil + }, + "Filesystem created": func(sb *superblock, value string) error { + createTime, err := time.Parse("Mon Jan 2 15:04:05 2006", value) + if err != nil { + return err + } + sb.mkfsTime = createTime.UTC() + return nil + }, + "Last mount time": func(sb *superblock, value string) error { + mountTime, err := time.Parse("Mon Jan 2 15:04:05 2006", value) + if err != nil { + return err + } + sb.mountTime = mountTime.UTC() + return nil + }, + "Last write time": func(sb *superblock, value string) error { + writeTime, err := time.Parse("Mon Jan 2 15:04:05 2006", value) + if err != nil { + return err + } + sb.writeTime = writeTime.UTC() + return nil + }, + "Mount count": func(sb *superblock, value string) error { + mountCount, err := strconv.ParseUint(value, 10, 16) + if err != nil { + return fmt.Errorf("Mount count: %w", err) + } + sb.mountCount = uint16(mountCount) + return nil + }, + "Maximum mount count": func(sb *superblock, value string) error { + maxMountCount, err := strconv.ParseInt(value, 10, 16) + if err != nil { + return fmt.Errorf("Maximum mount count: %w", err) + } + sb.mountsToFsck = uint16(maxMountCount) + return nil + }, + "Last checked": func(sb *superblock, value string) error { + lastChecked, err := time.Parse("Mon Jan 2 15:04:05 2006", value) + if err != nil { + return err + } + sb.lastCheck = lastChecked.UTC() + return nil + }, + "First inode": func(sb *superblock, value string) error { + firstInode, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("First inode: %w", err) + } + sb.firstNonReservedInode = uint32(firstInode) + return nil + }, + "Inode size": func(sb *superblock, value string) error { + inodeSize, err := strconv.ParseUint(value, 10, 16) + if err != nil { + return fmt.Errorf("Inode size: %w", err) + } + sb.inodeSize = uint16(inodeSize) + return nil + }, + "Required extra isize": func(sb *superblock, value string) error { + inodeMinBytes, err := strconv.ParseUint(value, 10, 16) + if err != nil { + return fmt.Errorf("Required extra isize: %w", err) + } + sb.inodeMinBytes = uint16(inodeMinBytes) + return nil + }, + "Desired extra isize": func(sb *superblock, value string) error { + inodeReserveBytes, err := strconv.ParseUint(value, 10, 16) + if err != nil { + return fmt.Errorf("Desired extra isize: %w", err) + } + sb.inodeReserveBytes = uint16(inodeReserveBytes) + return nil + }, + "Journal inode": func(sb *superblock, value string) error { + journalInode, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Journal inode: %w", err) + } + sb.journalInode = uint32(journalInode) + return nil + }, + "Default directory hash": func(sb *superblock, value string) error { + switch value { + case "half_md4": + sb.hashVersion = hashHalfMD4 + case "tea": + sb.hashVersion = hashTea + case "legacy": + sb.hashVersion = hashLegacy + default: + return fmt.Errorf("unknown directory hash %s", value) + } + + return nil + }, + "Directory Hash Seed": func(sb *superblock, value string) error { + u, err := uuid.FromString(value) + if err != nil { + return err + } + hashTreeSeedBytes := u.Bytes() + hashTreeSeed := make([]uint32, 4) + for i := 0; i < 4; i++ { + hashTreeSeed[i] = binary.LittleEndian.Uint32(hashTreeSeedBytes[i*4 : (i+1)*4]) + } + sb.hashTreeSeed = hashTreeSeed + + return nil + }, + "Checksum type": func(sb *superblock, value string) error { + switch value { + case "crc32c": + sb.checksumType = checkSumTypeCRC32c + default: + return fmt.Errorf("unknown checksum type %s", value) + } + return nil + }, + "Checksum seed": func(sb *superblock, value string) error { + checksumSeed, err := strconv.ParseUint(value, 0, 32) + if err != nil { + return fmt.Errorf("Checksum seed: %w", err) + } + sb.checksumSeed = uint32(checksumSeed) + return nil + }, + "Orphan file inode": func(sb *superblock, value string) error { + orphanInode, err := strconv.ParseUint(value, 10, 32) + if err != nil { + return fmt.Errorf("Orphan file inode: %w", err) + } + sb.orphanedInodeInodeNumber = uint32(orphanInode) + return nil + }, + "Journal backup": func(sb *superblock, value string) error { + switch value { + case "inode blocks": + // unfortunately, debugfs does not give this to us, so we read it manually + /* + Journal backup inodes: 0x0001f30a, 0x00000004, 0x00000000, 0x00000000, 0x00001000, 0x0000c001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + (these are in little-endian already; I converted by running: + `dd if=superblock.bin bs=1 skip=$((0x10c)) count=$((15 * 4)) | hexdump -e '15/4 "0x%08x, " "\n"'` + ) + they are saved as testdata/dist/journalinodes.txt + */ + + sb.journalBackup = &journalBackup{ + iBlocks: [15]uint32{0x0001f30a, 0x00000004, 0x00000000, 0x00000000, 0x00001000, 0x0000c001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, + iSize: uint64(4096 * KB), + } + default: + return fmt.Errorf("unknown journal backup %s", value) + } + return nil + }, + "Reserved blocks uid": func(sb *superblock, value string) error { + parts := strings.Split(value, " ") + if len(parts) < 2 { + return fmt.Errorf("invalid uid string %s", value) + } + uid, err := strconv.ParseUint(parts[0], 10, 16) + if err != nil { + return fmt.Errorf("Reserved blocks uid: %w", err) + } + sb.reservedBlocksDefaultUID = uint16(uid) + return nil + }, + "Reserved blocks gid": func(sb *superblock, value string) error { + parts := strings.Split(value, " ") + if len(parts) < 2 { + return fmt.Errorf("invalid gid string %s", value) + } + gid, err := strconv.ParseUint(parts[0], 10, 16) + if err != nil { + return fmt.Errorf("Reserved blocks gid: %w", err) + } + sb.reservedBlocksDefaultGID = uint16(gid) + return nil + }, + "Lifetime writes": func(sb *superblock, value string) error { + parts := strings.Split(value, " ") + if len(parts) < 2 { + return fmt.Errorf("invalid lifetime writes string %s", value) + } + writes, err := strconv.ParseUint(parts[0], 10, 64) + if err != nil { + return fmt.Errorf("Lifetime writes: %w", err) + } + // if this is in MB, we need to convert to KB + if parts[1] == "MB" { + writes *= uint64(KB) + } + sb.totalKBWritten = writes + return nil + }, + "Filesystem flags": func(sb *superblock, value string) error { + flags := strings.Split(value, " ") + for _, flag := range flags { + switch flag { + case "unsigned_directory_hash": + sb.miscFlags.unsignedDirectoryHash = true + case "signed_directory_hash": + sb.miscFlags.signedDirectoryHash = true + case "test_code": + sb.miscFlags.developmentTest = true + default: + return fmt.Errorf("unknown flag %s", flag) + } + } + return nil + }, + "Default mount options": func(sb *superblock, value string) error { + options := strings.Split(value, " ") + for _, option := range options { + switch option { + case "user_xattr": + sb.defaultMountOptions.userspaceExtendedAttributes = true + case "acl": + sb.defaultMountOptions.posixACLs = true + default: + return fmt.Errorf("unknown mount option %s", option) + } + } + return nil + }, + "Filesystem features": func(sb *superblock, value string) error { + features := strings.Split(value, " ") + for _, feature := range features { + switch feature { + case "has_journal": + sb.features.hasJournal = true + case "ext_attr": + sb.features.extendedAttributes = true + case "resize_inode": + case "dir_index": + sb.features.directoryIndices = true + case "orphan_file": + sb.features.orphanFile = true + case "filetype": + sb.features.directoryEntriesRecordFileType = true + case "extent": + sb.features.extents = true + case "64bit": + sb.features.fs64Bit = true + case "flex_bg": + sb.features.flexBlockGroups = true + case "metadata_csum_seed": + sb.features.metadataChecksumSeedInSuperblock = true + case "sparse_super": + sb.features.sparseSuperblock = true + case "large_file": + sb.features.largeFile = true + case "huge_file": + sb.features.hugeFile = true + case "dir_nlink": + sb.features.largeSubdirectoryCount = true + case "extra_isize": + sb.features.largeInodes = true + case "metadata_csum": + sb.features.metadataChecksums = true + default: + return fmt.Errorf("unknown feature %s", feature) + } + } + return nil + }, +} + +func testGetValidSuperblockAndGDTs() (sb *superblock, gd []groupDescriptor, superblockBytes, gdtBytes []byte, err error) { + // get the raw bytes + superblockBytes, err = os.ReadFile(testSuperblockFile) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("failed to read %s", testSuperblockFile) + } + + gdtBytes, err = os.ReadFile(testGDTFile) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("failed to read %s", testGDTFile) + } + + // get the info for the superblock + stats, err := os.ReadFile(testFilesystemStats) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("failed to read %s", testFilesystemStats) + } + // parse the stats + sb = &superblock{} + var ( + descs []groupDescriptor + inGroups bool + currentGroup *groupDescriptor + ) + scanner := bufio.NewScanner(bytes.NewReader(stats)) + for scanner.Scan() { + line := scanner.Text() + if !inGroups { + parts := strings.SplitN(line, ":", 2) + if len(parts) < 2 { + continue + } + if parts[0] == "Group 0" { + inGroups = true + } else { + key := strings.TrimSpace(parts[0]) + value := strings.TrimSpace(parts[1]) + if fn, ok := testSuperblockFuncs[key]; ok { + if err := fn(sb, value); err != nil { + return nil, nil, nil, nil, fmt.Errorf("failed to parse %s: %v", key, err) + } + } + continue + } + } + // we are in groups, so parse group section + for i, gdtLine := range gdtRELines { + matches := gdtLine.re.FindStringSubmatch(line) + if len(matches) > 0 { + if i == 0 { + // this is the first line, so we need to save the previous group + if currentGroup != nil { + descs = append(descs, *currentGroup) + } + currentGroup = &groupDescriptor{size: 64} + } + if gdtLine.handler != nil { + if err := gdtLine.handler(currentGroup, matches); err != nil { + return nil, nil, nil, nil, fmt.Errorf("failed to parse group descriptor line %d: %w", i, err) + } + } + // it matched one line, so do not go on to the next + break + } + } + } + // these have been fixed. If they ever change, we will need to modify here. + sb.errorFirstTime = time.Unix(0, 0).UTC() + sb.errorLastTime = time.Unix(0, 0).UTC() + juuid, err := uuid.FromBytes([]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("Failed to parse journal UUID: %v", err) + } + sb.journalSuperblockUUID = &juuid + sb.clusterSize = 1 + + // this is a bit strange, but necessary. The totalKB written given by all tools round, just enough to make our calculations off + // so we will adjust the value of sb to match expected if it is within 1%; good enough + parsed, err := superblockFromBytes(superblockBytes) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("Failed to parse superblock bytes: %w", err) + } + + sbKBWritten := float64(sb.totalKBWritten) + parsedKBWritten := float64(parsed.totalKBWritten) + KBdiff := math.Abs(parsedKBWritten - sbKBWritten) + if KBdiff/sbKBWritten < 0.01 { + sb.totalKBWritten = parsed.totalKBWritten + } + + return sb, descs, superblockBytes, gdtBytes[:64*len(descs)], nil +} + +func testDirEntriesFromDebugFS(file string) (dirEntries []*directoryEntry, err error) { + dirInfo, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("Error opening directory info file %s: %w", dirInfo, err) + } + scanner := bufio.NewScanner(bytes.NewReader(dirInfo)) + for scanner.Scan() { + tokens := strings.Fields(scanner.Text()) + if len(tokens) < 9 { + continue + } + inodeStr := tokens[0] + filename := tokens[8] + fileTypeStr := tokens[2] + // remove the ( ) from the fileType + fileTypeStr = strings.TrimPrefix(fileTypeStr, "(") + fileTypeStr = strings.TrimSuffix(fileTypeStr, ")") + inode, err := strconv.ParseUint(inodeStr, 10, 32) + if err != nil { + return nil, fmt.Errorf("error parsing inode number %s: %w", inodeStr, err) + } + fileType, err := strconv.ParseUint(fileTypeStr, 10, 8) + if err != nil { + return nil, fmt.Errorf("error parsing file type %s: %w", fileTypeStr, err) + } + dirEntries = append(dirEntries, &directoryEntry{inode: uint32(inode), filename: filename, fileType: directoryFileType(fileType)}) + } + return dirEntries, nil +} diff --git a/filesystem/ext4/crc/crc16.go b/filesystem/ext4/crc/crc16.go new file mode 100644 index 00000000..b8c37882 --- /dev/null +++ b/filesystem/ext4/crc/crc16.go @@ -0,0 +1,44 @@ +package crc + +var crc16tab = [256]uint16{ + 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, + 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, + 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6, + 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, + 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485, + 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d, + 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4, + 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc, + 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823, + 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, + 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12, + 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a, + 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41, + 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49, + 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70, + 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, + 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f, + 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067, + 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, + 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256, + 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d, + 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, + 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c, + 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634, + 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, + 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3, + 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a, + 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92, + 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9, + 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1, + 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, + 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0} + +func CRC16(crc uint16, bs []byte) uint16 { + l := len(bs) + for i := 0; i < l; i++ { + crc = ((crc << 8) & 0xff00) ^ crc16tab[((crc>>8)&0xff)^uint16(bs[i])] + } + + return crc +} diff --git a/filesystem/ext4/crc/crc32.go b/filesystem/ext4/crc/crc32.go new file mode 100644 index 00000000..70b44b0e --- /dev/null +++ b/filesystem/ext4/crc/crc32.go @@ -0,0 +1,74 @@ +package crc + +import ( + "encoding/binary" + "hash/crc32" +) + +// Define the CRC32C table using the Castagnoli polynomial +var ( + crc32cTable = crc32.MakeTable(crc32.Castagnoli) + crc32cTables = generateTables(crc32cTable) +) + +func generateTables(poly *crc32.Table) [8][256]uint32 { + var tab [8][256]uint32 + tab[0] = *poly + + for i := 0; i < 256; i++ { + crc := tab[0][i] + for j := 1; j < 8; j++ { + crc = (crc >> 8) ^ tab[0][crc&0xff] + tab[j][i] = crc + } + } + + return tab +} + +func CRC32c(base uint32, b []byte) uint32 { + // Compute the CRC32C checksum + // for reasons unknown, the checksum from go package hash/crc32, using crc32.Update(), is different from the one calculated by the kernel + // so we use this + return crc32Body(base, b, &crc32cTables) +} + +// doCRC processes a single byte +func doCRC(crc uint32, x byte, tab *[256]uint32) uint32 { + return tab[(crc^uint32(x))&0xff] ^ (crc >> 8) +} + +// doCRC4 processes 4 bytes +func doCRC4(q uint32, tab *[8][256]uint32) uint32 { + return tab[3][q&0xff] ^ tab[2][(q>>8)&0xff] ^ tab[1][(q>>16)&0xff] ^ tab[0][(q>>24)&0xff] +} + +// doCRC8 processes 8 bytes +func doCRC8(q uint32, tab *[8][256]uint32) uint32 { + return tab[7][q&0xff] ^ tab[6][(q>>8)&0xff] ^ tab[5][(q>>16)&0xff] ^ tab[4][(q>>24)&0xff] +} + +func crc32Body(crc uint32, buf []byte, tab *[8][256]uint32) uint32 { + // Align it + for len(buf) > 0 && (uintptr(len(buf))&3) != 0 { + crc = doCRC(crc, buf[0], &tab[0]) + buf = buf[1:] + } + + // Process in chunks of 8 bytes + remLen := len(buf) % 8 + for len(buf) >= 8 { + q := crc ^ binary.LittleEndian.Uint32(buf[:4]) + crc = doCRC8(q, tab) + q = binary.LittleEndian.Uint32(buf[4:8]) + crc ^= doCRC4(q, tab) + buf = buf[8:] + } + + // Process remaining bytes + for _, b := range buf[:remLen] { + crc = doCRC(crc, b, &tab[0]) + } + + return crc +} diff --git a/filesystem/ext4/directory.go b/filesystem/ext4/directory.go new file mode 100644 index 00000000..80b19579 --- /dev/null +++ b/filesystem/ext4/directory.go @@ -0,0 +1,211 @@ +package ext4 + +import ( + "bytes" + "encoding/binary" + "fmt" +) + +const ( + directoryHashTreeRootMinSize = 0x28 + directoryHashTreeNodeMinSize = 0x12 +) + +// Directory represents a single directory in an ext4 filesystem +type Directory struct { + directoryEntry + root bool + entries []*directoryEntry +} + +// toBytes convert our entries to raw bytes. Provides checksum as well. Final returned byte slice will be a multiple of bytesPerBlock. +func (d *Directory) toBytes(bytesPerBlock uint32, checksumFunc checksumAppender) []byte { + b := make([]byte, 0) + var ( + previousLength int + previousEntry *directoryEntry + lastEntryCount int + block []byte + ) + if len(d.entries) == 0 { + return b + } + lastEntryCount = len(d.entries) - 1 + for i, de := range d.entries { + b2 := de.toBytes(0) + switch { + case len(block)+len(b2) > int(bytesPerBlock)-minDirEntryLength: + // if adding this one will go past the end of the block, pad out the previous + block = b[:len(block)-previousLength] + previousB := previousEntry.toBytes(uint16(int(bytesPerBlock) - len(block) - minDirEntryLength)) + block = append(block, previousB...) + // add the checksum + block = checksumFunc(block) + b = append(b, block...) + // start a new block + block = make([]byte, 0) + case i == lastEntryCount: + // if this is the last one, pad it out + b2 = de.toBytes(uint16(int(bytesPerBlock) - len(block) - minDirEntryLength)) + block = append(block, b2...) + // add the checksum + block = checksumFunc(block) + b = append(b, block...) + // start a new block + block = make([]byte, 0) + default: + block = append(block, b2...) + } + previousLength = len(b2) + previousEntry = de + } + remainder := len(b) % int(bytesPerBlock) + if remainder > 0 { + extra := int(bytesPerBlock) - remainder + zeroes := make([]byte, extra) + b = append(b, zeroes...) + } + return b +} + +type directoryHashEntry struct { + hash uint32 + block uint32 +} + +type dxNode interface { + entries() []directoryHashEntry +} + +type directoryHashNode struct { + childEntries []directoryHashEntry +} + +func (d *directoryHashNode) entries() []directoryHashEntry { + return d.childEntries +} + +type directoryHashRoot struct { + inodeDir uint32 + inodeParent uint32 + hashVersion hashVersion + depth uint8 + hashAlgorithm hashAlgorithm + childEntries []directoryHashEntry + dotEntry *directoryEntry + dotDotEntry *directoryEntry +} + +func (d *directoryHashRoot) entries() []directoryHashEntry { + return d.childEntries +} + +// parseDirectoryTreeRoot parses the directory hash tree root from the given byte slice. Reads only the root node. +func parseDirectoryTreeRoot(b []byte, largeDir bool) (node *directoryHashRoot, err error) { + // min size + if len(b) < directoryHashTreeRootMinSize { + return nil, fmt.Errorf("directory hash tree root is too small") + } + + // dot parameters + dotInode := binary.LittleEndian.Uint32(b[0x0:0x4]) + dotSize := binary.LittleEndian.Uint16(b[0x4:0x6]) + if dotSize != 12 { + return nil, fmt.Errorf("directory hash tree root dot size is %d and not 12", dotSize) + } + dotNameSize := b[0x6] + if dotNameSize != 1 { + return nil, fmt.Errorf("directory hash tree root dot name length is %d and not 1", dotNameSize) + } + dotFileType := directoryFileType(b[0x7]) + if dotFileType != dirFileTypeDirectory { + return nil, fmt.Errorf("directory hash tree root dot file type is %d and not %v", dotFileType, dirFileTypeDirectory) + } + dotName := b[0x8:0xc] + if !bytes.Equal(dotName, []byte{'.', 0, 0, 0}) { + return nil, fmt.Errorf("directory hash tree root dot name is %s and not '.'", dotName) + } + + // dotdot parameters + dotdotInode := binary.LittleEndian.Uint32(b[0xc:0x10]) + dotdotNameSize := b[0x12] + if dotdotNameSize != 2 { + return nil, fmt.Errorf("directory hash tree root dotdot name length is %d and not 2", dotdotNameSize) + } + dotdotFileType := directoryFileType(b[0x13]) + if dotdotFileType != dirFileTypeDirectory { + return nil, fmt.Errorf("directory hash tree root dotdot file type is %d and not %v", dotdotFileType, dirFileTypeDirectory) + } + dotdotName := b[0x14:0x18] + if !bytes.Equal(dotdotName, []byte{'.', '.', 0, 0}) { + return nil, fmt.Errorf("directory hash tree root dotdot name is %s and not '..'", dotdotName) + } + + treeInformation := b[0x1d] + if treeInformation != 8 { + return nil, fmt.Errorf("directory hash tree root tree information is %d and not 8", treeInformation) + } + treeDepth := b[0x1e] + // there are maximums for this + maxTreeDepth := uint8(2) + if largeDir { + maxTreeDepth = 3 + } + if treeDepth > maxTreeDepth { + return nil, fmt.Errorf("directory hash tree root tree depth is %d and not between 0 and %d", treeDepth, maxTreeDepth) + } + + dxEntriesCount := binary.LittleEndian.Uint16(b[0x22:0x24]) + + node = &directoryHashRoot{ + inodeDir: binary.LittleEndian.Uint32(b[0x0:0x4]), + inodeParent: binary.LittleEndian.Uint32(b[0xC:0x10]), + hashAlgorithm: hashAlgorithm(b[0x1c]), // what hashing algorithm is used? + depth: treeDepth, + childEntries: make([]directoryHashEntry, 0, int(dxEntriesCount)), + dotEntry: &directoryEntry{ + inode: dotInode, + fileType: dotFileType, + filename: ".", + }, + dotDotEntry: &directoryEntry{ + inode: dotdotInode, + fileType: dotdotFileType, + filename: "..", + }, + } + + // remove 1, because the count includes the one in the dx_root itself + node.childEntries = append(node.childEntries, directoryHashEntry{hash: 0, block: binary.LittleEndian.Uint32(b[0x24:0x28])}) + for i := 0; i < int(dxEntriesCount)-1; i++ { + entryOffset := 0x28 + (i * 8) + hash := binary.LittleEndian.Uint32(b[entryOffset : entryOffset+4]) + block := binary.LittleEndian.Uint32(b[entryOffset+4 : entryOffset+8]) + node.childEntries = append(node.childEntries, directoryHashEntry{hash: hash, block: block}) + } + + return node, nil +} + +// parseDirectoryTreeNode parses an internal directory hash tree node from the given byte slice. Reads only the node. +func parseDirectoryTreeNode(b []byte) (node *directoryHashNode, err error) { + // min size + if len(b) < directoryHashTreeNodeMinSize { + return nil, fmt.Errorf("directory hash tree root is too small") + } + + dxEntriesCount := binary.LittleEndian.Uint16(b[0xa:0xc]) + + node = &directoryHashNode{ + childEntries: make([]directoryHashEntry, 0, int(dxEntriesCount)), + } + node.childEntries = append(node.childEntries, directoryHashEntry{hash: 0, block: binary.LittleEndian.Uint32(b[0xc:0x10])}) + for i := 0; i < int(dxEntriesCount)-1; i++ { + entryOffset := 0x10 + (i * 8) + hash := binary.LittleEndian.Uint32(b[entryOffset : entryOffset+4]) + block := binary.LittleEndian.Uint32(b[entryOffset+4 : entryOffset+8]) + node.childEntries = append(node.childEntries, directoryHashEntry{hash: hash, block: block}) + } + + return node, nil +} diff --git a/filesystem/ext4/directory_test.go b/filesystem/ext4/directory_test.go new file mode 100644 index 00000000..80f52482 --- /dev/null +++ b/filesystem/ext4/directory_test.go @@ -0,0 +1,24 @@ +package ext4 + +import ( + "testing" +) + +func TestDirectoryToBytes(t *testing.T) { + dir, bytesPerBlock, expected, err := testGetValidRootDirectory() + if err != nil { + t.Fatal(err) + } + //nolint:dogsled // we know and we do not care + sb, _, _, _, err := testGetValidSuperblockAndGDTs() + if err != nil { + t.Fatal(err) + } + b := dir.toBytes(bytesPerBlock, directoryChecksumAppender(sb.checksumSeed, 2, 0)) + + // read the bytes from the disk + diff, diffString := dumpByteSlicesWithDiffs(b, expected, 32, false, true, true) + if diff { + t.Errorf("directory.toBytes() mismatched, actual then expected\n%s", diffString) + } +} diff --git a/filesystem/ext4/directoryentry.go b/filesystem/ext4/directoryentry.go new file mode 100644 index 00000000..29546934 --- /dev/null +++ b/filesystem/ext4/directoryentry.go @@ -0,0 +1,176 @@ +package ext4 + +import ( + "encoding/binary" + "fmt" +) + +// directoryFileType uses different constants than the file type property in the inode +type directoryFileType uint8 + +const ( + minDirEntryLength int = 12 // actually 9 for 1-byte file length, but must be multiple of 4 bytes + maxDirEntryLength int = 263 + + // directory file types + dirFileTypeUnknown directoryFileType = 0x0 + dirFileTypeRegular directoryFileType = 0x1 + dirFileTypeDirectory directoryFileType = 0x2 + dirFileTypeCharacter directoryFileType = 0x3 + dirFileTypeBlock directoryFileType = 0x4 + dirFileTypeFifo directoryFileType = 0x5 + dirFileTypeSocket directoryFileType = 0x6 + dirFileTypeSymlink directoryFileType = 0x7 +) + +// directoryEntry is a single directory entry +type directoryEntry struct { + inode uint32 + filename string + fileType directoryFileType +} + +func (de *directoryEntry) equal(other *directoryEntry) bool { + return de.inode == other.inode && de.filename == other.filename && de.fileType == other.fileType +} + +func directoryEntryFromBytes(b []byte) (*directoryEntry, error) { + if len(b) < minDirEntryLength { + return nil, fmt.Errorf("directory entry of length %d is less than minimum %d", len(b), minDirEntryLength) + } + if len(b) > maxDirEntryLength { + b = b[:maxDirEntryLength] + } + + //nolint:gocritic // keep this here for future reference + // length := binary.LittleEndian.Uint16(b[0x4:0x6]) + nameLength := b[0x6] + name := b[0x8 : 0x8+nameLength] + de := directoryEntry{ + inode: binary.LittleEndian.Uint32(b[0x0:0x4]), + fileType: directoryFileType(b[0x7]), + filename: string(name), + } + return &de, nil +} + +func directoryEntriesChecksumFromBytes(b []byte) (checksum uint32, err error) { + if len(b) != minDirEntryLength { + return checksum, fmt.Errorf("directory entry checksum of length %d is not required %d", len(b), minDirEntryLength) + } + inode := binary.LittleEndian.Uint32(b[0x0:0x4]) + if inode != 0 { + return checksum, fmt.Errorf("directory entry checksum inode is not 0") + } + length := binary.LittleEndian.Uint16(b[0x4:0x6]) + if int(length) != minDirEntryLength { + return checksum, fmt.Errorf("directory entry checksum length is not %d", minDirEntryLength) + } + nameLength := b[0x6] + if nameLength != 0 { + return checksum, fmt.Errorf("directory entry checksum name length is not 0") + } + fileType := b[0x7] + if fileType != 0xde { + return checksum, fmt.Errorf("directory entry checksum file type is not set to reserved 0xde") + } + return binary.LittleEndian.Uint32(b[0x8:0xc]), nil +} + +// toBytes convert a directoryEntry to bytes. If isLast, then the size recorded is the number of bytes +// from beginning of directory entry to end of block, minus the amount left for the checksum. +func (de *directoryEntry) toBytes(withSize uint16) []byte { + // it must be the header length + filename length rounded up to nearest multiple of 4 + nameLength := uint8(len(de.filename)) + entryLength := uint16(nameLength) + 8 + if leftover := entryLength % 4; leftover > 0 { + entryLength += (4 - leftover) + } + + if withSize > 0 { + entryLength = withSize + } + b := make([]byte, entryLength) + binary.LittleEndian.PutUint32(b[0x0:0x4], de.inode) + binary.LittleEndian.PutUint16(b[0x4:0x6], entryLength) + b[0x6] = nameLength + b[0x7] = byte(de.fileType) + copy(b[0x8:], de.filename) + + return b +} + +func parseDirEntriesLinear(b []byte, withChecksums bool, blocksize, inodeNumber, inodeGeneration, checksumSeed uint32) ([]*directoryEntry, error) { + // checksum if needed + if withChecksums { + var ( + newb []byte + checksumEntryOffset = int(blocksize) - minDirEntryLength + checksumOffset = int(blocksize) - 4 + ) + checksummer := directoryChecksummer(checksumSeed, inodeNumber, inodeGeneration) + for i := 0; i < len(b); i += int(blocksize) { + block := b[i : i+int(blocksize)] + inBlockChecksum := block[checksumOffset:] + block = block[:checksumEntryOffset] + // save everything except the checksum + newb = append(newb, block...) + // checksum the entire block + checksumValue := binary.LittleEndian.Uint32(inBlockChecksum) + // checksum the block + actualChecksum := checksummer(block) + if actualChecksum != checksumValue { + return nil, fmt.Errorf("directory block checksum mismatch: expected %x, got %x", checksumValue, actualChecksum) + } + } + b = newb + } + + // convert into directory entries + entries := make([]*directoryEntry, 0, 4) + count := 0 + for i := 0; i < len(b); count++ { + // read the length of the entry + length := binary.LittleEndian.Uint16(b[i+0x4 : i+0x6]) + de, err := directoryEntryFromBytes(b[i : i+int(length)]) + if err != nil { + return nil, fmt.Errorf("failed to parse directory entry %d: %v", count, err) + } + entries = append(entries, de) + i += int(length) + } + return entries, nil +} + +// parseDirEntriesHashed parse hashed data blocks to get directory entries. +// If hashedName is 0, returns all directory entries; otherwise, returns a slice with a single entry with the given name. +func parseDirEntriesHashed(b []byte, depth uint8, node dxNode, blocksize uint32, withChecksums bool, inodeNumber, inodeGeneration, checksumSeed uint32) (dirEntries []*directoryEntry, err error) { + for _, entry := range node.entries() { + var ( + addDirEntries []*directoryEntry + start = entry.block * blocksize + end = start + blocksize + ) + + nextBlock := b[start:end] + if depth == 0 { + addDirEntries, err = parseDirEntriesLinear(nextBlock, withChecksums, blocksize, inodeNumber, inodeGeneration, checksumSeed) + if err != nil { + return nil, fmt.Errorf("error parsing linear directory entries: %w", err) + } + } else { + // recursively parse the next level of the tree + // read the next level down + node, err := parseDirectoryTreeNode(nextBlock) + if err != nil { + return nil, fmt.Errorf("error parsing directory tree node: %w", err) + } + addDirEntries, err = parseDirEntriesHashed(b, depth-1, node, blocksize, withChecksums, inodeNumber, inodeGeneration, checksumSeed) + if err != nil { + return nil, fmt.Errorf("error parsing hashed directory entries: %w", err) + } + } + dirEntries = append(dirEntries, addDirEntries...) + } + return dirEntries, nil +} diff --git a/filesystem/ext4/directoryentry_test.go b/filesystem/ext4/directoryentry_test.go new file mode 100644 index 00000000..03e5a3a8 --- /dev/null +++ b/filesystem/ext4/directoryentry_test.go @@ -0,0 +1,24 @@ +package ext4 + +import ( + "testing" + + "github.com/go-test/deep" +) + +func TestDirectoryEntriesFromBytes(t *testing.T) { + expected, blocksize, b, err := testGetValidRootDirectory() + if err != nil { + t.Fatal(err) + } + // remove checksums, as we are not testing those here + b = b[:len(b)-minDirEntryLength] + entries, err := parseDirEntriesLinear(b, false, blocksize, 2, 0, 0) + if err != nil { + t.Fatalf("Failed to parse directory entries: %v", err) + } + deep.CompareUnexportedFields = true + if diff := deep.Equal(expected.entries, entries); diff != nil { + t.Errorf("directoryFromBytes() = %v", diff) + } +} diff --git a/filesystem/ext4/dirhash.go b/filesystem/ext4/dirhash.go new file mode 100644 index 00000000..8717c341 --- /dev/null +++ b/filesystem/ext4/dirhash.go @@ -0,0 +1,157 @@ +package ext4 + +import ( + "github.com/diskfs/go-diskfs/filesystem/ext4/md4" +) + +const ( + teaDelta uint32 = 0x9E3779B9 + k1 uint32 = 0 + k2 uint32 = 0o13240474631 + k3 uint32 = 0o15666365641 + ext4HtreeEOF32 uint32 = ((1 << (32 - 1)) - 1) + ext4HtreeEOF64 uint64 = ((1 << (64 - 1)) - 1) +) + +type hashVersion uint8 + +const ( + HashVersionLegacy = 0 + HashVersionHalfMD4 = 1 + HashVersionTEA = 2 + HashVersionLegacyUnsigned = 3 + HashVersionHalfMD4Unsigned = 4 + HashVersionTEAUnsigned = 5 + HashVersionSIP = 6 +) + +func TEATransform(buf [4]uint32, in []uint32) [4]uint32 { + var sum uint32 + var b0, b1 = buf[0], buf[1] + var a, b, c, d = in[0], in[1], in[2], in[3] + var n = 16 + + for ; n > 0; n-- { + sum += teaDelta + b0 += ((b1 << 4) + a) ^ (b1 + sum) ^ ((b1 >> 5) + b) + b1 += ((b0 << 4) + c) ^ (b0 + sum) ^ ((b0 >> 5) + d) + } + + buf[0] += b0 + buf[1] += b1 + return buf +} + +// the old legacy hash +// +//nolint:unparam,revive // we do not used signed, but we probably should, so leaving until we are sure +func dxHackHash(name string, signed bool) uint32 { + var hash uint32 + var hash0, hash1 uint32 = 0x12a3fe2d, 0x37abe8f9 + b := []byte(name) + + for i := len(b); i > 0; i-- { + // get the specific character + c := int(b[i-1]) + // the value of the individual character depends on if it is signed or not + hash = hash1 + (hash0 ^ uint32(c*7152373)) + + if hash&0x80000000 != 0 { + hash -= 0x7fffffff + } + hash1 = hash0 + hash0 = hash + } + return hash0 << 1 +} + +//nolint:unparam,revive // we do not used signed, but we probably should, so leaving until we are sure +func str2hashbuf(msg string, num int, signed bool) []uint32 { + var buf [8]uint32 + var pad, val uint32 + b := []byte(msg) + size := len(b) + + pad = uint32(size) | (uint32(size) << 8) + pad |= pad << 16 + + val = pad + if size > num*4 { + size = num * 4 + } + var j int + for i := 0; i < size; i++ { + c := int(b[i]) + val = uint32(c) + (val << 8) + if (i % 4) == 3 { + buf[j] = val + val = pad + num-- + j++ + } + } + num-- + if num >= 0 { + buf[j] = val + j++ + } + for num--; num >= 0; num-- { + buf[j] = pad + j++ + } + return buf[:] +} + +func ext4fsDirhash(name string, version hashVersion, seed []uint32) (hash, minorHash uint32) { + /* Initialize the default seed for the hash checksum functions */ + var buf = [4]uint32{0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476} + + // Check to see if the seed is all zero, and if so, use the default + for i, val := range seed { + if val != 0 { + buf[i] = val + } + } + + switch version { + case HashVersionLegacyUnsigned: + hash = dxHackHash(name, false) + case HashVersionLegacy: + hash = dxHackHash(name, true) + case HashVersionHalfMD4Unsigned: + for i := 0; i < len(name); i += 32 { + in := str2hashbuf(name[i:], 8, false) + buf[1] = md4.HalfMD4Transform(buf, in) + } + minorHash = buf[2] + hash = buf[1] + case HashVersionHalfMD4: + for i := 0; i < len(name); i += 32 { + in := str2hashbuf(name[i:], 8, true) + buf[1] = md4.HalfMD4Transform(buf, in) + } + minorHash = buf[2] + hash = buf[1] + case HashVersionTEAUnsigned: + for i := 0; i < len(name); i += 16 { + in := str2hashbuf(name[i:], 4, false) + buf = TEATransform(buf, in) + } + hash = buf[0] + minorHash = buf[1] + case HashVersionTEA: + for i := 0; i < len(name); i += 16 { + in := str2hashbuf(name[i:], 4, true) + buf = TEATransform(buf, in) + } + hash = buf[0] + minorHash = buf[1] + default: + return 0, 0 + } + hash &= ^uint32(1) + if hash == (ext4HtreeEOF32 << 1) { + hash = (ext4HtreeEOF32 - 1) << 1 + } + return hash, minorHash +} diff --git a/filesystem/ext4/dirhash_test.go b/filesystem/ext4/dirhash_test.go new file mode 100644 index 00000000..7299535a --- /dev/null +++ b/filesystem/ext4/dirhash_test.go @@ -0,0 +1 @@ +package ext4 diff --git a/filesystem/ext4/ext4.go b/filesystem/ext4/ext4.go new file mode 100644 index 00000000..df7121a9 --- /dev/null +++ b/filesystem/ext4/ext4.go @@ -0,0 +1,1084 @@ +package ext4 + +import ( + "encoding/binary" + "errors" + "fmt" + "math" + "os" + "path" + "strings" + "time" + + "github.com/diskfs/go-diskfs/filesystem" + "github.com/diskfs/go-diskfs/filesystem/ext4/crc" + "github.com/diskfs/go-diskfs/util" + uuid "github.com/satori/go.uuid" +) + +// SectorSize indicates what the sector size in bytes is +type SectorSize uint16 + +// BlockSize indicates how many sectors are in a block +type BlockSize uint8 + +// BlockGroupSize indicates how many blocks are in a group, standardly 8*block_size_in_bytes + +const ( + // SectorSize512 is a sector size of 512 bytes, used as the logical size for all ext4 filesystems + SectorSize512 SectorSize = 512 + minBlocksPerGroup uint32 = 256 + BootSectorSize SectorSize = 2 * SectorSize512 + SuperblockSize SectorSize = 2 * SectorSize512 + BlockGroupFactor int = 8 + DefaultInodeRatio int64 = 8192 + DefaultInodeSize int64 = 256 + DefaultReservedBlocksPercent uint8 = 5 + DefaultVolumeName = "diskfs_ext4" + minClusterSize int = 128 + maxClusterSize int = 65529 + bytesPerSlot int = 32 + maxCharsLongFilename int = 13 + maxBlocksPerExtent int = 32768 + million int = 1000000 + billion int = 1000 * million + firstNonReservedInode uint32 = 11 // traditional + + minBlockLogSize int = 10 /* 1024 */ + maxBlockLogSize int = 16 /* 65536 */ + minBlockSize int = (1 << minBlockLogSize) + maxBlockSize int = (1 << maxBlockLogSize) + + max32Num uint64 = math.MaxUint32 + max64Num uint64 = math.MaxUint64 + + maxFilesystemSize32Bit uint64 = 16*2 ^ 40 + maxFilesystemSize64Bit uint64 = 1*2 ^ 60 + + checksumType uint8 = 1 + + // default for log groups per flex group + defaultLogGroupsPerFlex int = 3 + + // fixed inodes + rootInode uint32 = 2 + userQuotaInode uint32 = 3 + groupQuotaInode uint32 = 4 + journalInode uint32 = 8 + lostFoundInode = 11 // traditional +) + +type Params struct { + UUID *uuid.UUID + SectorsPerBlock uint8 + BlocksPerGroup uint32 + InodeRatio int64 + InodeCount uint32 + SparseSuperVersion uint8 + Checksum bool + ClusterSize int64 + ReservedBlocksPercent uint8 + VolumeName string + // JournalDevice external journal device, only checked if WithFeatureSeparateJournalDevice(true) is set + JournalDevice string + LogFlexBlockGroups int + Features []FeatureOpt + DefaultMountOpts []MountOpt +} + +// FileSystem implememnts the FileSystem interface +type FileSystem struct { + bootSector []byte + superblock *superblock + groupDescriptors *groupDescriptors + dataBlockBitmap bitmap + inodeBitmap bitmap + blockGroups int64 + size int64 + start int64 + file util.File +} + +// Equal compare if two filesystems are equal +func (fs *FileSystem) Equal(a *FileSystem) bool { + localMatch := fs.file == a.file + sbMatch := fs.superblock.equal(a.superblock) + gdMatch := fs.groupDescriptors.equal(a.groupDescriptors) + return localMatch && sbMatch && gdMatch +} + +// Create creates an ext4 filesystem in a given file or device +// +// requires the util.File where to create the filesystem, size is the size of the filesystem in bytes, +// start is how far in bytes from the beginning of the util.File to create the filesystem, +// and blocksize is is the logical blocksize to use for creating the filesystem +// +// note that you are *not* required to create the filesystem on the entire disk. You could have a disk of size +// 20GB, and create a small filesystem of size 50MB that begins 2GB into the disk. +// This is extremely useful for creating filesystems on disk partitions. +// +// Note, however, that it is much easier to do this using the higher-level APIs at github.com/diskfs/go-diskfs +// which allow you to work directly with partitions, rather than having to calculate (and hopefully not make any errors) +// where a partition starts and ends. +// +// If the provided blocksize is 0, it will use the default of 512 bytes. If it is any number other than 0 +// or 512, it will return an error. +// +//nolint:gocyclo // yes, this has high cyclomatic complexity, but we can accept it +func Create(f util.File, size, start, sectorsize int64, p *Params) (*FileSystem, error) { + // be safe about the params pointer + if p == nil { + p = &Params{} + } + + // sectorsize must be <=0 or exactly SectorSize512 or error + // because of this, we know we can scale it down to a uint32, since it only can be 512 bytes + if sectorsize != int64(SectorSize512) && sectorsize > 0 { + return nil, fmt.Errorf("sectorsize for ext4 must be either 512 bytes or 0, not %d", sectorsize) + } + var sectorsize32 = uint32(sectorsize) + // there almost are no limits on an ext4 fs - theoretically up to 1 YB + // but we do have to check the max and min size per the requested parameters + // if size < minSizeGivenParameters { + // return nil, fmt.Errorf("requested size is smaller than minimum allowed ext4 size %d for given parameters", minSizeGivenParameters*4) + // } + // if size > maxSizeGivenParameters { + // return nil, fmt.Errorf("requested size is bigger than maximum ext4 size %d for given parameters", maxSizeGivenParameters*4) + // } + + // uuid + fsuuid := p.UUID + if fsuuid == nil { + fsuuid2 := uuid.NewV4() + fsuuid = &fsuuid2 + } + + // blocksize + sectorsPerBlock := p.SectorsPerBlock + userProvidedBlocksize := false + switch { + case sectorsPerBlock > 128 || sectorsPerBlock < 2: + return nil, fmt.Errorf("invalid sectors per block %d, must be between %d and %d sectors", sectorsPerBlock, 2, 128) + case sectorsPerBlock < 1: + sectorsPerBlock = 2 + default: + userProvidedBlocksize = true + } + blocksize := uint32(sectorsPerBlock) * sectorsize32 + + // how many whole blocks is that? + numblocks := size / int64(blocksize) + + // recalculate if it was not user provided + if !userProvidedBlocksize { + sectorsPerBlockR, blocksizeR, numblocksR := recalculateBlocksize(numblocks, size) + _, blocksize, numblocks = uint8(sectorsPerBlockR), blocksizeR, numblocksR + } + + // how many blocks in each block group (and therefore how many block groups) + // if not provided, by default it is 8*blocksize (in bytes) + blocksPerGroup := p.BlocksPerGroup + switch { + case blocksPerGroup <= 0: + blocksPerGroup = blocksize * 8 + case blocksPerGroup < minBlocksPerGroup: + return nil, fmt.Errorf("invalid number of blocks per group %d, must be at least %d", blocksPerGroup, minBlocksPerGroup) + case blocksPerGroup > 8*blocksize: + return nil, fmt.Errorf("invalid number of blocks per group %d, must be no larger than 8*blocksize of %d", blocksPerGroup, blocksize) + case blocksPerGroup%8 != 0: + return nil, fmt.Errorf("invalid number of blocks per group %d, must be divisible by 8", blocksPerGroup) + } + + // how many block groups do we have? + blockGroups := numblocks / int64(blocksPerGroup) + + // track how many free blocks we have + freeBlocks := numblocks + + clusterSize := p.ClusterSize + + // use our inode ratio to determine how many inodes we should have + inodeRatio := p.InodeRatio + if inodeRatio <= 0 { + inodeRatio = DefaultInodeRatio + } + if inodeRatio < int64(blocksize) { + inodeRatio = int64(blocksize) + } + if inodeRatio < clusterSize { + inodeRatio = clusterSize + } + + inodeCount := p.InodeCount + switch { + case inodeCount <= 0: + // calculate how many inodes are needed + inodeCount64 := (numblocks * int64(blocksize)) / inodeRatio + if uint64(inodeCount64) > max32Num { + return nil, fmt.Errorf("requested %d inodes, greater than max %d", inodeCount64, max32Num) + } + inodeCount = uint32(inodeCount64) + case uint64(inodeCount) > max32Num: + return nil, fmt.Errorf("requested %d inodes, greater than max %d", inodeCount, max32Num) + } + + inodesPerGroup := int64(inodeCount) / blockGroups + + // track how many free inodes we have + freeInodes := inodeCount + + // which blocks have superblock and GDT? + var ( + backupSuperblocks []int64 + backupSuperblockGroupsSparse [2]uint32 + ) + // 0 - primary + // ?? - backups + switch p.SparseSuperVersion { + case 2: + // backups in first and last block group + backupSuperblockGroupsSparse = [2]uint32{0, uint32(blockGroups) - 1} + backupSuperblocks = []int64{0, 1, blockGroups - 1} + default: + backupSuperblockGroups := calculateBackupSuperblockGroups(blockGroups) + backupSuperblocks = []int64{0} + for _, bg := range backupSuperblockGroups { + backupSuperblocks = append(backupSuperblocks, bg*int64(blocksPerGroup)) + } + } + + freeBlocks -= int64(len(backupSuperblocks)) + + var firstDataBlock uint32 + if blocksize == 1024 { + firstDataBlock = 1 + } + + /* + size calculations + we have the total size of the disk from `size uint64` + we have the sectorsize fixed at SectorSize512 + + what do we need to determine or calculate? + - block size + - number of blocks + - number of block groups + - block groups for superblock and gdt backups + - in each block group: + - number of blocks in gdt + - number of reserved blocks in gdt + - number of blocks in inode table + - number of data blocks + + config info: + + [defaults] + base_features = sparse_super,large_file,filetype,resize_inode,dir_index,ext_attr + default_mntopts = acl,user_xattr + enable_periodic_fsck = 0 + blocksize = 4096 + inode_size = 256 + inode_ratio = 16384 + + [fs_types] + ext3 = { + features = has_journal + } + ext4 = { + features = has_journal,extent,huge_file,flex_bg,uninit_bg,64bit,dir_nlink,extra_isize + inode_size = 256 + } + ext4dev = { + features = has_journal,extent,huge_file,flex_bg,uninit_bg,inline_data,64bit,dir_nlink,extra_isize + inode_size = 256 + options = test_fs=1 + } + small = { + blocksize = 1024 + inode_size = 128 + inode_ratio = 4096 + } + floppy = { + blocksize = 1024 + inode_size = 128 + inode_ratio = 8192 + } + big = { + inode_ratio = 32768 + } + huge = { + inode_ratio = 65536 + } + news = { + inode_ratio = 4096 + } + largefile = { + inode_ratio = 1048576 + blocksize = -1 + } + largefile4 = { + inode_ratio = 4194304 + blocksize = -1 + } + hurd = { + blocksize = 4096 + inode_size = 128 + } + */ + + // allocate root directory, single inode + freeInodes-- + + // how many reserved blocks? + reservedBlocksPercent := p.ReservedBlocksPercent + if reservedBlocksPercent <= 0 { + reservedBlocksPercent = DefaultReservedBlocksPercent + } + + // are checksums enabled? + gdtChecksumType := gdtChecksumNone + if p.Checksum { + gdtChecksumType = gdtChecksumMetadata + } + + // we do not yet support bigalloc + var clustersPerGroup = blocksPerGroup + + // inodesPerGroup: once we know how many inodes per group, and how many groups + // we will have the total inode count + + volumeName := p.VolumeName + if volumeName == "" { + volumeName = DefaultVolumeName + } + + fflags := defaultFeatureFlags + for _, flagopt := range p.Features { + flagopt(&fflags) + } + + mflags := defaultMiscFlags + + // generate hash seed + hashSeed := uuid.NewV4() + hashSeedBytes := hashSeed.Bytes() + htreeSeed := make([]uint32, 0, 4) + htreeSeed = append(htreeSeed, + binary.LittleEndian.Uint32(hashSeedBytes[:4]), + binary.LittleEndian.Uint32(hashSeedBytes[4:8]), + binary.LittleEndian.Uint32(hashSeedBytes[8:12]), + binary.LittleEndian.Uint32(hashSeedBytes[12:16]), + ) + + // create a UUID for the journal + journalSuperblockUUID := uuid.NewV4() + + // group descriptor size could be 32 or 64, depending on option + var gdSize uint16 + if fflags.fs64Bit { + gdSize = groupDescriptorSize64Bit + } + + var firstMetaBG uint32 + if fflags.metaBlockGroups { + return nil, fmt.Errorf("meta block groups not yet supported") + } + + // calculate the maximum number of block groups + // maxBlockGroups = (maxFSSize) / (blocksPerGroup * blocksize) + var ( + maxBlockGroups uint64 + ) + if fflags.fs64Bit { + maxBlockGroups = maxFilesystemSize64Bit / (uint64(blocksPerGroup) * uint64(blocksize)) + } else { + maxBlockGroups = maxFilesystemSize32Bit / (uint64(blocksPerGroup) * uint64(blocksize)) + } + reservedGDTBlocks := maxBlockGroups * 32 / maxBlockGroups + if reservedGDTBlocks > math.MaxUint16 { + return nil, fmt.Errorf("too many reserved blocks calculated for group descriptor table") + } + + var ( + journalDeviceNumber uint32 + err error + ) + if fflags.separateJournalDevice && p.JournalDevice != "" { + journalDeviceNumber, err = journalDevice(p.JournalDevice) + if err != nil { + return nil, fmt.Errorf("unable to get journal device: %w", err) + } + } + + // get default mount options + mountOptions := defaultMountOptionsFromOpts(p.DefaultMountOpts) + + // initial KB written. This must be adjusted over time to include: + // - superblock itself (1KB bytes) + // - GDT + // - block bitmap (1KB per block group) + // - inode bitmap (1KB per block group) + // - inode tables (inodes per block group * bytes per inode) + // - root directory + + // for now, we just make it 1024 = 1 KB + initialKB := 1024 + + // only set a project quota inode if the feature was enabled + var projectQuotaInode uint32 + if fflags.projectQuotas { + projectQuotaInode = lostFoundInode + 1 + freeInodes-- + } + + // how many log groups per flex group? Depends on if we have flex groups + logGroupsPerFlex := 0 + if fflags.flexBlockGroups { + logGroupsPerFlex = defaultLogGroupsPerFlex + if p.LogFlexBlockGroups > 0 { + logGroupsPerFlex = p.LogFlexBlockGroups + } + } + + // create the superblock - MUST ADD IN OPTIONS + now, epoch := time.Now(), time.Unix(0, 0) + sb := superblock{ + inodeCount: inodeCount, + blockCount: uint64(numblocks), + reservedBlocks: uint64(reservedBlocksPercent) / 100 * uint64(numblocks), + freeBlocks: uint64(freeBlocks), + freeInodes: freeInodes, + firstDataBlock: firstDataBlock, + blockSize: blocksize, + clusterSize: uint64(clusterSize), + blocksPerGroup: blocksPerGroup, + clustersPerGroup: clustersPerGroup, + inodesPerGroup: uint32(inodesPerGroup), + mountTime: now, + writeTime: now, + mountCount: 0, + mountsToFsck: 0, + filesystemState: fsStateCleanlyUnmounted, + errorBehaviour: errorsContinue, + minorRevision: 0, + lastCheck: now, + checkInterval: 0, + creatorOS: osLinux, + revisionLevel: 1, + reservedBlocksDefaultUID: 0, + reservedBlocksDefaultGID: 0, + firstNonReservedInode: firstNonReservedInode, + inodeSize: uint16(DefaultInodeSize), + blockGroup: 0, + features: fflags, + uuid: fsuuid, + volumeLabel: volumeName, + lastMountedDirectory: "/", + algorithmUsageBitmap: 0, // not used in Linux e2fsprogs + preallocationBlocks: 0, // not used in Linux e2fsprogs + preallocationDirectoryBlocks: 0, // not used in Linux e2fsprogs + reservedGDTBlocks: uint16(reservedGDTBlocks), + journalSuperblockUUID: &journalSuperblockUUID, + journalInode: journalInode, + journalDeviceNumber: journalDeviceNumber, + orphanedInodesStart: 0, + hashTreeSeed: htreeSeed, + hashVersion: hashHalfMD4, + groupDescriptorSize: gdSize, + defaultMountOptions: *mountOptions, + firstMetablockGroup: firstMetaBG, + mkfsTime: now, + journalBackup: nil, + // 64-bit mode features + inodeMinBytes: minInodeExtraSize, + inodeReserveBytes: wantInodeExtraSize, + miscFlags: mflags, + raidStride: 0, + multiMountPreventionInterval: 0, + multiMountProtectionBlock: 0, + raidStripeWidth: 0, + checksumType: checksumType, + totalKBWritten: uint64(initialKB), + errorCount: 0, + errorFirstTime: epoch, + errorFirstInode: 0, + errorFirstBlock: 0, + errorFirstFunction: "", + errorFirstLine: 0, + errorLastTime: epoch, + errorLastInode: 0, + errorLastLine: 0, + errorLastBlock: 0, + errorLastFunction: "", + mountOptions: "", // no mount options until it is mounted + backupSuperblockBlockGroups: backupSuperblockGroupsSparse, + lostFoundInode: lostFoundInode, + overheadBlocks: 0, + checksumSeed: crc.CRC32c(0, fsuuid.Bytes()), // according to docs, this should be crc32c(~0, $orig_fs_uuid) + snapshotInodeNumber: 0, + snapshotID: 0, + snapshotReservedBlocks: 0, + snapshotStartInode: 0, + userQuotaInode: userQuotaInode, + groupQuotaInode: groupQuotaInode, + projectQuotaInode: projectQuotaInode, + logGroupsPerFlex: uint64(logGroupsPerFlex), + } + gdt := groupDescriptors{} + + b, err := sb.toBytes() + if err != nil { + return nil, fmt.Errorf("error converting Superblock to bytes: %v", err) + } + + g := gdt.toBytes(gdtChecksumType, sb.checksumSeed) + // how big should the GDT be? + gdSize = groupDescriptorSize + if sb.features.fs64Bit { + gdSize = groupDescriptorSize64Bit + } + gdtSize := int64(gdSize) * numblocks + // write the superblock and GDT to the various locations on disk + for _, bg := range backupSuperblocks { + block := bg * int64(blocksPerGroup) + blockStart := block * int64(blocksize) + // allow that the first one requires an offset + incr := int64(0) + if block == 0 { + incr = int64(SectorSize512) * 2 + } + + // write the superblock + count, err := f.WriteAt(b, incr+blockStart+start) + if err != nil { + return nil, fmt.Errorf("error writing Superblock for block %d to disk: %v", block, err) + } + if count != int(SuperblockSize) { + return nil, fmt.Errorf("wrote %d bytes of Superblock for block %d to disk instead of expected %d", count, block, SuperblockSize) + } + + // write the GDT + count, err = f.WriteAt(g, incr+blockStart+int64(SuperblockSize)+start) + if err != nil { + return nil, fmt.Errorf("error writing GDT for block %d to disk: %v", block, err) + } + if count != int(gdtSize) { + return nil, fmt.Errorf("wrote %d bytes of GDT for block %d to disk instead of expected %d", count, block, gdtSize) + } + } + + // create root directory + // there is nothing in there + return &FileSystem{ + bootSector: []byte{}, + superblock: &sb, + groupDescriptors: &gdt, + blockGroups: blockGroups, + size: size, + start: start, + file: f, + }, nil +} + +// Read reads a filesystem from a given disk. +// +// requires the util.File where to read the filesystem, size is the size of the filesystem in bytes, +// start is how far in bytes from the beginning of the util.File the filesystem is expected to begin, +// and blocksize is is the logical blocksize to use for creating the filesystem +// +// note that you are *not* required to read a filesystem on the entire disk. You could have a disk of size +// 20GB, and a small filesystem of size 50MB that begins 2GB into the disk. +// This is extremely useful for working with filesystems on disk partitions. +// +// Note, however, that it is much easier to do this using the higher-level APIs at github.com/diskfs/go-diskfs +// which allow you to work directly with partitions, rather than having to calculate (and hopefully not make any errors) +// where a partition starts and ends. +// +// If the provided blocksize is 0, it will use the default of 512 bytes. If it is any number other than 0 +// or 512, it will return an error. +func Read(file util.File, size, start, sectorsize int64) (*FileSystem, error) { + // blocksize must be <=0 or exactly SectorSize512 or error + if sectorsize != int64(SectorSize512) && sectorsize > 0 { + return nil, fmt.Errorf("sectorsize for ext4 must be either 512 bytes or 0, not %d", sectorsize) + } + // we do not check for ext4 max size because it is theoreticallt 1YB, which is bigger than an int64! Even 1ZB is! + if size < Ext4MinSize { + return nil, fmt.Errorf("requested size is smaller than minimum allowed ext4 size %d", Ext4MinSize) + } + + // load the information from the disk + // read boot sector code + bs := make([]byte, BootSectorSize) + n, err := file.ReadAt(bs, start) + if err != nil { + return nil, fmt.Errorf("could not read boot sector bytes from file: %v", err) + } + if uint16(n) < uint16(BootSectorSize) { + return nil, fmt.Errorf("only could read %d boot sector bytes from file", n) + } + + // read the superblock + // the superblock is one minimal block, i.e. 2 sectors + superblockBytes := make([]byte, SuperblockSize) + n, err = file.ReadAt(superblockBytes, start+int64(BootSectorSize)) + if err != nil { + return nil, fmt.Errorf("could not read superblock bytes from file: %v", err) + } + if uint16(n) < uint16(SuperblockSize) { + return nil, fmt.Errorf("only could read %d superblock bytes from file", n) + } + + // convert the bytes into a superblock structure + sb, err := superblockFromBytes(superblockBytes) + if err != nil { + return nil, fmt.Errorf("could not interpret superblock data: %v", err) + } + + // now read the GDT + // how big should the GDT be? + gdtSize := uint64(sb.groupDescriptorSize) * sb.blockGroupCount() + + gdtBytes := make([]byte, gdtSize) + n, err = file.ReadAt(gdtBytes, start+int64(BootSectorSize)+int64(SuperblockSize)) + if err != nil { + return nil, fmt.Errorf("could not read Group Descriptor Table bytes from file: %v", err) + } + if uint64(n) < gdtSize { + return nil, fmt.Errorf("only could read %d Group Descriptor Table bytes from file instead of %d", n, gdtSize) + } + gdt, err := groupDescriptorsFromBytes(gdtBytes, sb.groupDescriptorSize, sb.checksumSeed, sb.gdtChecksumType()) + if err != nil { + return nil, fmt.Errorf("could not interpret Group Descriptor Table data: %v", err) + } + + return &FileSystem{ + bootSector: bs, + superblock: sb, + groupDescriptors: gdt, + blockGroups: int64(sb.blockGroupCount()), + size: size, + start: start, + file: file, + }, nil +} + +// Type returns the type code for the filesystem. Always returns filesystem.TypeExt4 +func (fs *FileSystem) Type() filesystem.Type { + return filesystem.TypeExt4 +} + +// Mkdir make a directory at the given path. It is equivalent to `mkdir -p`, i.e. idempotent, in that: +// +// * It will make the entire tree path if it does not exist +// * It will not return an error if the path already exists +func (fs *FileSystem) Mkdir(p string) error { + _, _, err := fs.readDirWithMkdir(p, true) + // we are not interesting in returning the entries + return err +} + +// ReadDir return the contents of a given directory in a given filesystem. +// +// Returns a slice of os.FileInfo with all of the entries in the directory. +// +// Will return an error if the directory does not exist or is a regular file and not a directory +func (fs *FileSystem) ReadDir(p string) ([]os.FileInfo, error) { + _, entries, err := fs.readDirWithMkdir(p, false) + if err != nil { + return nil, fmt.Errorf("error reading directory %s: %v", p, err) + } + // once we have made it here, looping is done. We have found the final entry + // we need to return all of the file info + count := len(entries) + ret := make([]os.FileInfo, count) + for i, e := range entries { + in, err := fs.readInode(e.inode) + if err != nil { + return nil, fmt.Errorf("could not read inode %d at position %d in directory: %v", e.inode, i, err) + } + ret[i] = FileInfo{ + modTime: in.modifyTime, + name: e.filename, + size: int64(in.size), + isDir: e.fileType == dirFileTypeDirectory, + } + } + + return ret, nil +} + +// OpenFile returns an io.ReadWriter from which you can read the contents of a file +// or write contents to the file +// +// accepts normal os.OpenFile flags +// +// returns an error if the file does not exist +func (fs *FileSystem) OpenFile(p string, flag int) (filesystem.File, error) { + // get the path + dir := path.Dir(p) + filename := path.Base(p) + // if the dir == filename, then it is just / + if dir == filename { + return nil, fmt.Errorf("cannot open directory %s as file", p) + } + // get the directory entries + parentDir, entries, err := fs.readDirWithMkdir(dir, false) + if err != nil { + return nil, fmt.Errorf("could not read directory entries for %s", dir) + } + // we now know that the directory exists, see if the file exists + var targetEntry *directoryEntry + for _, e := range entries { + if e.filename != filename { + continue + } + // cannot do anything with directories + if e.fileType == dirFileTypeDirectory { + return nil, fmt.Errorf("cannot open directory %s as file", p) + } + // if we got this far, we have found the file + targetEntry = e + break + } + + // see if the file exists + // if the file does not exist, and is not opened for os.O_CREATE, return an error + if targetEntry == nil { + if flag&os.O_CREATE == 0 { + return nil, fmt.Errorf("target file %s does not exist and was not asked to create", p) + } + // else create it + targetEntry, err = fs.mkFile(parentDir, filename) + if err != nil { + return nil, fmt.Errorf("failed to create file %s: %v", p, err) + } + } + // get the inode + inodeNumber := targetEntry.inode + inode, err := fs.readInode(inodeNumber) + if err != nil { + return nil, fmt.Errorf("could not read inode number %d: %v", inodeNumber, err) + } + + // if a symlink, read the target, rather than the inode itself, which does not point to anything + if inode.fileType == fileTypeSymbolicLink { + // is the symlink relative or absolute? + linkTarget := inode.linkTarget + if !path.IsAbs(linkTarget) { + // convert it into an absolute path + // and start the process again + linkTarget = path.Join(dir, linkTarget) + // we probably could make this more efficient by checking if the final linkTarget + // is in the same directory as we already are parsing, rather than walking the whole thing again + // leave that for the future. + linkTarget = path.Clean(linkTarget) + } + return fs.OpenFile(linkTarget, flag) + } + offset := int64(0) + if flag&os.O_APPEND == os.O_APPEND { + offset = int64(inode.size) + } + // when we open a file, we load the inode but also all of the extents + extents, err := inode.extents.blocks(fs) + if err != nil { + return nil, fmt.Errorf("could not read extent tree for inode %d: %v", inodeNumber, err) + } + return &File{ + directoryEntry: targetEntry, + inode: inode, + isReadWrite: flag&os.O_RDWR != 0, + isAppend: flag&os.O_APPEND != 0, + offset: offset, + filesystem: fs, + extents: extents, + }, nil +} + +// Label read the volume label +func (fs *FileSystem) Label() string { + if fs.superblock == nil { + return "" + } + return fs.superblock.volumeLabel +} + +// SetLabel changes the label on the writable filesystem. Different file system may hav different +// length constraints. +// +//nolint:revive // will use params when read-write +func (fs *FileSystem) SetLabel(label string) error { + return errors.New("cannot set label, filesystem currently read-only") +} + +// readInode read a single inode from disk +func (fs *FileSystem) readInode(inodeNumber uint32) (*inode, error) { + if inodeNumber == 0 { + return nil, fmt.Errorf("cannot read inode 0") + } + sb := fs.superblock + inodeSize := sb.inodeSize + inodesPerGroup := sb.inodesPerGroup + // figure out which block group the inode is on + bg := (inodeNumber - 1) / inodesPerGroup + // read the group descriptor to find out the location of the inode table + gd := fs.groupDescriptors.descriptors[bg] + inodeTableBlock := gd.inodeTableLocation + inodeBytes := make([]byte, inodeSize) + // bytesStart is beginning byte for the inodeTableBlock + byteStart := inodeTableBlock * uint64(sb.blockSize) + // offsetInode is how many inodes in our inode is + offsetInode := (inodeNumber - 1) % inodesPerGroup + // offset is how many bytes in our inode is + offset := offsetInode * uint32(inodeSize) + read, err := fs.file.ReadAt(inodeBytes, int64(byteStart)+int64(offset)) + if err != nil { + return nil, fmt.Errorf("failed to read inode %d from offset %d of block %d from block group %d: %v", inodeNumber, offset, inodeTableBlock, bg, err) + } + if read != int(inodeSize) { + return nil, fmt.Errorf("read %d bytes for inode %d instead of inode size of %d", read, inodeNumber, inodeSize) + } + inode, err := inodeFromBytes(inodeBytes, sb, inodeNumber) + if err != nil { + return nil, fmt.Errorf("could not interpret inode data: %v", err) + } + // fill in symlink target if needed + if inode.fileType == fileTypeSymbolicLink && inode.linkTarget == "" { + // read the symlink target + extents, err := inode.extents.blocks(fs) + if err != nil { + return nil, fmt.Errorf("could not read extent tree for symlink inode %d: %v", inodeNumber, err) + } + b, err := fs.readFileBytes(extents) + if err != nil { + return nil, fmt.Errorf("could not read symlink target for inode %d: %v", inodeNumber, err) + } + inode.linkTarget = string(b) + } + return inode, nil +} + +// writeInode write a single inode to disk +func (fs *FileSystem) writeInode(i *inode) error { + sb := fs.superblock + inodeSize := sb.inodeSize + inodesPerGroup := sb.inodesPerGroup + // figure out which block group the inode is on + bg := (i.number - 1) / inodesPerGroup + // read the group descriptor to find out the location of the inode table + gd := fs.groupDescriptors.descriptors[bg] + inodeTableBlock := gd.inodeTableLocation + // bytesStart is beginning byte for the inodeTableBlock + // byteStart := inodeTableBlock * sb.blockSize + // offsetInode is how many inodes in our inode is + offsetInode := (i.number - 1) % inodesPerGroup + // offset is how many bytes in our inode is + offset := int64(offsetInode) * int64(inodeSize) + inodeBytes := i.toBytes(sb) + wrote, err := fs.file.WriteAt(inodeBytes, offset) + if err != nil { + return fmt.Errorf("failed to write inode %d at offset %d of block %d from block group %d: %v", i.number, offset, inodeTableBlock, bg, err) + } + if wrote != int(inodeSize) { + return fmt.Errorf("wrote %d bytes for inode %d instead of inode size of %d", wrote, i.number, inodeSize) + } + return nil +} + +// read directory entries for a given directory +func (fs *FileSystem) readDirectory(inodeNumber uint32) ([]*directoryEntry, error) { + // read the inode for the directory + in, err := fs.readInode(inodeNumber) + if err != nil { + return nil, fmt.Errorf("could not read inode %d for directory: %v", inodeNumber, err) + } + // convert the extent tree into a sorted list of extents + extents, err := in.extents.blocks(fs) + if err != nil { + return nil, fmt.Errorf("unable to get blocks for inode %d: %w", in.number, err) + } + // read the contents of the file across all blocks + b, err := fs.readFileBytes(extents) + if err != nil { + return nil, fmt.Errorf("error reading file bytes for inode %d: %v", inodeNumber, err) + } + + var dirEntries []*directoryEntry + // TODO: none of this works for hashed dir entries, indicated by in.flags.hashedDirectoryIndexes == true + if in.flags.hashedDirectoryIndexes { + treeRoot, err := parseDirectoryTreeRoot(b[:fs.superblock.blockSize], fs.superblock.features.largeDirectory) + if err != nil { + return nil, fmt.Errorf("failed to parse directory tree root: %v", err) + } + subDirEntries, err := parseDirEntriesHashed(b, treeRoot.depth, treeRoot, fs.superblock.blockSize, fs.superblock.features.metadataChecksums, in.number, in.nfsFileVersion, fs.superblock.checksumSeed) + if err != nil { + return nil, fmt.Errorf("failed to parse hashed directory entries: %v", err) + } + // include the dot and dotdot entries from treeRoot; they do not show up in the hashed entries + dirEntries = []*directoryEntry{treeRoot.dotEntry, treeRoot.dotDotEntry} + dirEntries = append(dirEntries, subDirEntries...) + } else { + // convert into directory entries + dirEntries, err = parseDirEntriesLinear(b, fs.superblock.features.metadataChecksums, fs.superblock.blockSize, in.number, in.nfsFileVersion, fs.superblock.checksumSeed) + } + + return dirEntries, err +} + +// readFileBytes read all of the bytes for an individual file pointed at by a given inode +// normally not very useful, but helpful when reading an entire directory. +func (fs *FileSystem) readFileBytes(extents extents) ([]byte, error) { + // walk through each one, gobbling up the bytes + b := make([]byte, 0, fs.superblock.blockSize) + for i, e := range extents { + start := e.startingBlock * uint64(fs.superblock.blockSize) + count := uint64(e.count) * uint64(fs.superblock.blockSize) + b2 := make([]byte, count) + read, err := fs.file.ReadAt(b2, int64(start)) + if err != nil { + return nil, fmt.Errorf("failed to read bytes for extent %d: %v", i, err) + } + if read != int(count) { + return nil, fmt.Errorf("read %d bytes instead of %d for extent %d", read, count, i) + } + b = append(b, b2...) + } + return b, nil +} + +//nolint:revive // params are unused because this still is read-only, but it will be read-write at some point +func (fs *FileSystem) writeDirectoryEntries(dir *Directory) error { + return errors.New("unsupported write directory entries, currently read-only") +} + +// make a file +// +//nolint:revive // params are unused because this still is read-only, but it will be read-write at some point +func (fs *FileSystem) mkFile(parent *Directory, name string) (*directoryEntry, error) { + return nil, errors.New("unsupported to create a file, currently read-only") +} + +// readDirWithMkdir - walks down a directory tree to the last entry +// if it does not exist, it may or may not make it +func (fs *FileSystem) readDirWithMkdir(p string, doMake bool) (*Directory, []*directoryEntry, error) { + paths := splitPath(p) + + // walk down the directory tree until all paths have been walked or we cannot find something + // start with the root directory + var entries []*directoryEntry + currentDir := &Directory{ + directoryEntry: directoryEntry{ + inode: rootInode, + filename: "", + fileType: dirFileTypeDirectory, + }, + } + entries, err := fs.readDirectory(rootInode) + if err != nil { + return nil, nil, fmt.Errorf("failed to read directory %s", "/") + } + for i, subp := range paths { + // do we have an entry whose name is the same as this name? + found := false + for _, e := range entries { + if e.filename != subp { + continue + } + if e.fileType != dirFileTypeDirectory { + return nil, nil, fmt.Errorf("cannot create directory at %s since it is a file", "/"+strings.Join(paths[0:i+1], "/")) + } + // the filename matches, and it is a subdirectory, so we can break after saving the directory entry, which contains the inode + found = true + currentDir = &Directory{ + directoryEntry: *e, + } + break + } + + // if not, either make it, retrieve its cluster and entries, and loop; + // or error out + if !found { + if doMake { + var subdirEntry *directoryEntry + subdirEntry, err = fs.mkSubdir(currentDir, subp) + if err != nil { + return nil, nil, fmt.Errorf("failed to create subdirectory %s", "/"+strings.Join(paths[0:i+1], "/")) + } + // write the directory entries to disk + err = fs.writeDirectoryEntries(currentDir) + if err != nil { + return nil, nil, fmt.Errorf("error writing directory entries to disk: %v", err) + } + // save where we are to search next + currentDir = &Directory{ + directoryEntry: *subdirEntry, + } + } else { + return nil, nil, fmt.Errorf("path %s not found", "/"+strings.Join(paths[0:i+1], "/")) + } + } + // get all of the entries in this directory + entries, err = fs.readDirectory(currentDir.inode) + if err != nil { + return nil, nil, fmt.Errorf("failed to read directory %s", "/"+strings.Join(paths[0:i+1], "/")) + } + } + // once we have made it here, looping is done; we have found the final entry + return currentDir, entries, nil +} + +// readBlock read a single block from disk +func (fs *FileSystem) readBlock(blockNumber uint64) ([]byte, error) { + sb := fs.superblock + // bytesStart is beginning byte for the inodeTableBlock + byteStart := blockNumber * uint64(sb.blockSize) + blockBytes := make([]byte, sb.blockSize) + read, err := fs.file.ReadAt(blockBytes, int64(byteStart)) + if err != nil { + return nil, fmt.Errorf("failed to read block %d: %v", blockNumber, err) + } + if read != int(sb.blockSize) { + return nil, fmt.Errorf("read %d bytes for block %d instead of size of %d", read, blockNumber, sb.blockSize) + } + return blockBytes, nil +} + +// recalculate blocksize based on the existing number of blocks +// - 0 <= blocks < 3MM : floppy - blocksize = 1024 +// - 3MM <= blocks < 512MM : small - blocksize = 1024 +// - 512MM <= blocks < 4*1024*1024MM : default - blocksize = +// - 4*1024*1024MM <= blocks < 16*1024*1024MM : big - blocksize = +// - 16*1024*1024MM <= blocks : huge - blocksize = +// +// the original code from e2fsprogs https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/misc/mke2fs.c +func recalculateBlocksize(numblocks, size int64) (sectorsPerBlock int, blocksize uint32, numBlocksAdjusted int64) { + var ( + million64 = int64(million) + sectorSize512 = uint32(SectorSize512) + ) + switch { + case 0 <= numblocks && numblocks < 3*million64: + sectorsPerBlock = 2 + blocksize = 2 * sectorSize512 + case 3*million64 <= numblocks && numblocks < 512*million64: + sectorsPerBlock = 2 + blocksize = 2 * sectorSize512 + case 512*million64 <= numblocks && numblocks < 4*1024*1024*million64: + sectorsPerBlock = 2 + blocksize = 2 * sectorSize512 + case 4*1024*1024*million64 <= numblocks && numblocks < 16*1024*1024*million64: + sectorsPerBlock = 2 + blocksize = 2 * sectorSize512 + case numblocks > 16*1024*1024*million64: + sectorsPerBlock = 2 + blocksize = 2 * sectorSize512 + } + return sectorsPerBlock, blocksize, size / int64(blocksize) +} + +// mkSubdir make a subdirectory of a given name inside the parent +// +//nolint:revive // params are unused because this still is read-only, but it will be read-write at some point +func (fs *FileSystem) mkSubdir(parent *Directory, name string) (*directoryEntry, error) { + return nil, errors.New("mksubdir not yet supported") +} diff --git a/filesystem/ext4/ext4.md b/filesystem/ext4/ext4.md new file mode 100644 index 00000000..05f62291 --- /dev/null +++ b/filesystem/ext4/ext4.md @@ -0,0 +1,290 @@ +# ext4 +This file describes the layout on disk of ext4. It is a living document and probably will be deleted rather than committed to git. + +The primary reference document is [here](https://ext4.wiki.kernel.org/index.php/Ext4_Disk_Layout#Overview). + +Also useful are: + +* https://blogs.oracle.com/linux/post/understanding-ext4-disk-layout-part-2 +* https://www.sans.org/blog/understanding-ext4-part-6-directories/ - blog series +* https://digital-forensics.sans.org/blog/2017/06/07/understanding-ext4-part-6-directories +* https://metebalci.com/blog/a-minimum-complete-tutorial-of-linux-ext4-file-system/ + +## Concepts + +* Sector: a section of 512 bytes +* Block: a contiguous group of sectors. Block size usually is either 4K (4096 bytes) or 1K (1024 bytes), i.e. 8 sectors or 2 sectors. Block size minimum is 1KB (2 sectors), max is 64KB (128 sectors). Each block is associated with exactly one file. A file may contain more than one block - e.g. if a file is larger than the size of a single block - but each block belongs to exactly one file. +* inode: metadata about a file or directory. Each inode contains metadata about exactly one file. The number of inodes in a system is identical to the number of blocks for 32-bit, or far fewer for 64-bit. +* Block group: a contiguous group of blocks. Each block group is (`8*block_size_in_bytes`) blocks. So if block size is 4K, or 4096 bytes, then a block group is `8*4096` = 32,768 blocks, each of size 4096 bytes, for a block group of 128MB. If block size is 1K, a block group is 8192 blocks, or 8MB. +* 64-bit feature: ext4 filesystems normally uses 32-bit, which means the maximum blocks per filesystem is 2^32. If the 64-bit feature is enabled, then the maximum blocks per filesystem is 2^64. +* Superblock: A block that contains information about the entire filesystem. Exists in block group 0 and sometimes is backed up to other block groups. The superblock contains information about the filesystem as a whole: inode size, block size, last mount time, etc. +* Block Group Descriptor: Block Group Descriptors contain information about each block group: start block, end block, inodes, etc. One Descriptor per Group. But it is stored next to the Superblock (and backups), not with each Group. +* Extent: an extent is a contiguous group of blocks. Extents are used to store files. Extents are mapped beginning with the inode, and provide the way of getting from an inode to the blocks that contain the file's data. + + +### Block Group + +Each block group is built in the following order. There is a distinction between Group 0 - the first one +in the filesystem - and all others. + +Block groups come in one of several types. It isn't necessary to list all of them here. The key elements are as follows. + +Block 0: + +1. Padding: 1024 bytes, used for boot sector + +Block 0 (above 1024 bytes, if blocksize >1024) or Block 1; all backup blocks: + +2. Superblock: One block +3. Group Descriptors: Many blocks +4. Reserved GDT Blocks: Many blocks, reserved in case we need to expand to more Group Descriptors in the future + +All blocks: + +5. Data block bitmap: 1 block. One bit per block in the block group. Set to 1 if a data block is in use, 0 if not. +6. inode bitmap: 1 block. One bit per inode in the block group. Set to 1 if an inode is in use, 0 if not. +7. inode table: many blocks. Calculated by `(inodes_per_group)*(size_of_inode)`. Remember that `inodes_per_group` = `blocks_per_group` = `8*block_size_in_bytes`. The original `size_of_inode` in ext2 was 128 bytes. In ext4 it uses 156 bytes, but is stored in 256 bytes of space, so `inode_size_in_bytes` = 256 bytes. +8. Data blocks: all of the rest of the blocks in the block group + +The variant on the above is with Flexible Block Groups. If flexbg is enabled, then block groups are grouped together, normally +groups of 16 (but the actual number is in the superblock). The data block bitmap, inode bitmap and inode table are +in the first block group for each flexible block group. + +This means you can have all sorts of combinations: + +* block that is both first in a block group (contains block bitmap, inode bitmap, inode table) and superblock/backup (contains superblock, GDT, reserved GDT blocks) +* block that is first in a block group (block bitmap, inode bitmap, inode table) but not first in a block group or Flex BG +* block that is superblock/backup (superblock, GDT, reserved GDT blocks) but not first in a block group or Flex BG +* neither of the above (contains just data blocks) + +Summary: block bitmap, inode bitmap and inode table are in the first block in a blockgroup or Flex BG, which is a consistent +number. Superblock backups are in specific blocks, calculated by being a block number that is a power of 3, 5 or 7. + +## How to + +Different actions. These all will be replaced by actual code. Things we need to be able to do: + +* walk the tree to a particular directory or file +* inode to data blocks +* read directory entries +* create a new directory entry +* read contents of a file +* write contents to a file + +### Walk the Tree + +In order to get to any particular file or directory in the ext4 filesystem, you need to "walk the tree". +For example, say you want to read the contents of directory `/usr/local/bin/`. + +1. Find the inode of the root directory in the inode table. This **always** is inode 2. +1. Read inode of the root directory to get the data blocks that contain the contents of the root directory. See [inode to data blocks](#inode-to-data-blocks). +1. Read the directory entries in the data blocks to get the names of the files and directories in root. This can be linear or hash. + * linear: read sequentially until you find the one whose name matches the desired subdirectory, for example `usr` + * hash: hash the name and use that to get the correct location +1. Using the matched directory entry, get the inode number for that subdirectory. +1. Use the superblock to read how many inodes are in each block group, e.g. 8144 +1. Calculate which block group contains the inode you are looking for. Using the above example, 0-8143 are in group 0, 8144-16287 are in group 1, etc. +1. Read the inode of that subdirectory in the inode table of the given block group to get the data blocks that contain the contents of that directory. +1. Repeat until you have read the data blocks for the desired entry. + +### Inode to Data Blocks + +Start with the inode + +1. Read the inode +1. Read the `i_block` value, 60 bytes at location 0x28 (= 40) +1. The first 12 bytes are an extent header: + * magic number 0xf30a (little endian) - 2 bytes + * number of entries following the header - 2 bytes - in the inode, always 1, 2, 3, or 4 + * maximum number of entries that could follow the header - 2 bytes - in the inode, always 4 + * depth of this node in the extent tree, where 0 = leaf, parent to that is 1, etc. - 2 bytes + * generation (unused) - 4 bytes +1. Read the entries that follow. + +If the data inside the inode is a leaf node (header depth = 0), then the entries will be leaf entries of 12 bytes: + +* first block in the file that this extent covers - 4 bytes +* number of blocks in this extent - 2 bytes - If the value of this field is <= 32768, the extent is initialized. If the value of the field is > 32768, the extent is uninitialized and the actual extent length is ee_len - 32768. Therefore, the maximum length of a initialized extent is 32768 blocks, and the maximum length of an uninitialized extent is 32767. +* upper 16 bits of the block location - 2 bytes +* lower 32 bits of the block location - 4 bytes + +For example, if a file has 1,000 blocks, and a particular extent entry points to blocks 100-299 of the file, and it starts +at filesystem block 10000, then the entry will be: + +* 100 (4 bytes) +* 200 (2 bytes) - is this correct? This would indicate uninitialized +* 0 (2 bytes) +* 10000 (4 bytes) + +If the data inside the inode is an internal node (header depth > 0), then the entries will be internal entries of 12 bytes: + +* first file block that this extent and all its children cover - 4 bytes +* lower 32 bits of the block number os the extent node on the next lower level - 4 bytes +* upper 16 bits of the block number of the extent node on the next lower level - 2 bytes +* unused - 2 bytes + +For example, if a file has 10,000 blocks, covered in 15 extents, then there will be 15 level 0 extents, and 1 level 1 extent, +and the 15 extents are stored in filesystem block 20000. + +The lower level 0 extent will look like our leaf node example above. +The upper level 1 extent will look like: + +* 0 (4 bytes) - because this starts from file block 0 +* 20000 (4 bytes) - the block number of the extent node on the next lower level +* 0 (2 bytes) - because lower 4 bytes were enough to cover + +You can find all of the blocks simply by looking at the root of the extent tree in the inode. + +* If the extents for the file are 4 or fewer, then the extent tree is stored in the inode itself. +* If the extents for the file are more than 4, but enough to fit the extents in 1-4 blocks, then: + * level 0 extents are stored in a single separate block + * level 1 extents are stored in the inode, with up to 4 entries pointing to the level 0 extents blocks +* If the extents for the file are more than fit in 4 blocks, then: + * level 0 extents are stored in as many blocks as needed + * level 1 extents are stored in other blocks pointing to level 0 extent blocks + * level 2 extents - up to 4 - are stored in the inode + +Each of these is repeated upwards. The maximum at the top of the tree is 4, the maximum in each block is `(blocksize-12)/12`. +Because: + +- each block of extent nodes needs a header of 12 bytes +- each extent node is 12 bytes + +### Read Directory Entries +To read directory entries + +1. Walk the tree until you find the inode for the directory you want. +2. Read the data blocks pointed to by that inode, see [inode to data blocks](#inode-to-data-blocks). +3. Interpret the data blocks. + +The directory itself is just a single "file". It has an inode that indicates the file "length", which is the number of bytes that the listing takes up. + +There are two types of directories: Classic and Hash Tree. Classic are just linear, unsorted, unordered lists of files. They work fine for shorter lists, but large directories can be slow to traverse if they grow too large. Once the contents of the directory "file" will be larger than a single block, ext4 switches it to a Hash Tree Directory Entry. + +Which directory type it is - classical linear or hash tree - does not affect the inode, for which it is just a file, but the contents of the directory entry "file". You can tell if it is linear or hash tree by checking the inode flag `EXT4_INDEX_FL`. If it is set (i.e. `& 0x1000`), then it is a hash tree. + +#### Classic Directory Entry +Each directory entry is at most 263 bytes long. They are arranged in sequential order in the file. The contents are: + +* first four bytes are a `uint32` giving the inode number +* next 2 bytes give the length of the directory entry (max 263) +* next 1 byte gives the length of the file name (which could be calculated from the directory entry length...) +* next 1 byte gives type: unknown, file, directory, char device, block device, FIFO, socket, symlink +* next (up to 255) bytes contain chars with the file or directory name + +The above is for the second version of ext4 directory entry (`ext4_dir_entry_2`). The slightly older version (`ext4_dir_entry`) is similar, except it does not give the file type, which in any case is in the inode. Instead it uses 2 bytes for the file name length. + +#### Hash Tree Directory Entry +Entries in the block are structured as follows: + +* `.` and `..` are the first two entries, and are classic `ext4_dir_entry_2` +* Look in byte `0x1c` to find the hash algorithm +* take the desired file/subdirectory name (just the `basename`) and hash it, see [Calculating the hash value][Calculating the hash value] +* look in the root directory entry in the hashmap to find the relative block number. Note that the block number is relative to the block in the directory, not the filesystem or block group. +* Next step depends on the hash tree depth: + * Depth = 0: read directory entry from the given block. + * Depth > 0: use the block as another lookup table, repeating the steps above, until we come to the depth. +* Once we have the final leaf block given by the hash table, we just read the block sequentially; it will be full of classical directory entries linearly. + +When reading the hashmap, it may not match precisely. Instead, it will fit within a range. The hashmap is sorted by `>=` to `<`. So if the table has entries as follows: + +| Hash | Block | +| -------|-------| +| 0 | 1 | +| 100 | 25 | +| 300 | 16 | + +Then: + +* all hash values from `0`-`99` will be in block `1` +* all hash values from `100-299` will be in block `25` +* all hash values from `300` to infinite will be in block `16` + +##### Calculating the hash value + +The hashing uses one of several algorithms. Most commonly, it is Half MD4. + +MD4 gives a digest length of 128 bits = 16 bytes. + +The "half md4" algorithm is given by the transformation code +[here](https://elixir.bootlin.com/linux/v4.6/source/lib/halfmd4.c#L26). The result +of it is 4 bytes. Those 4 bytes are the input to the hash. + +### Create a Directory Entry + +To create a directory, you need to go through the following steps: + +1. "Walk the tree" to find the parent directory. E.g. if you are creating `/usr/local/foo`, then you need to walk the tree to get to the directory "file" for `/usr/local`. If the parent directory is just the root `/`, e.g. you are creating `/foo`, then you use the root directory, whose inode always is `2`. +2. Determine if the parent directory is classical linear or hash tree, by checking the flag `EXT4_INDEX_FL` in the parent directory's inode. + * if hash: + 1. find a block in the "directory" file with space to add a linear entry + 1. create and add the entry + 1. calculate the hash of the filename + 1. add the `hash:block_number` entry into the tree + 1. rebalance if needed + * if linear, create the entry: + * if adding one will not exceed the size for linear, write it and done + * if adding one will exceed the size for linear, convert to hash, then write it + +#### Hash Tree + +1. Calculate the hash of the new directory entry name +2. Determine which block in the parent directory "file" the new entry should live, based on the hash table. +3. Find the block. +4. Add a classical linear entry at the end of it. +5. Update the inode for the parent directory with the new file size. + +If there is no room at the end of the block, you need to rebalance the hash tree. See below. + +#### Classical Linear + +1. Find the last block in the parent directory "file" + * if there is no room for another entry, extend the file size by another block, and update the inode for the file with the block map +2. Add a classical linear directory entry at the end of it. +3. Update the inode for the parent directory with the new file size, if any. E.g. if the entry fit within padding, there is no change in size. + +If this entry will cause the directory "file" to extend beyond a single block, convert to a hash tree. See below. + +### Rebalance Hash Tree + +Rebalancing the hash tree is rebalancing a btree, where the keys are the hash values. +You only ever need to rebalance when you add or remove an entry. + +#### Adding an entry + +When adding an entry, you only ever need to rebalance the node to which you add it, and parents up to the root. + +1. Calculate the hash of the entry +1. Determine the leaf node into which it should go +1. If the leaf node has less than the maximum number of elements, add it and done +1. If the lead node has the maximum number of elements: + 1. Add the new node in the right place + 1. Find the median + 1. Move the median up to the parent node + 1. If necessary, rebalance the parent node + +#### Removing an entry + +When removing an entry, you only ever need to rebalance the node from which you remove it, and parents up to the root. + +1. Calculate the hash of the entry +1. Determine the leaf node in which it exists +1. If the leaf node has less than the maximum number of elements, add it and done +1. If the lead node has the maximum number of elements: + 1. Add the new node in the right place + 1. Find the median + 1. Move the median up to the parent node + 1. If necessary, rebalance the parent node + +### Convert Classical Linear to Hash Tree + + +### Read File Contents + +1. Walk the tree until you find the inode for the file you want. +1. Find the data blocks for that inode, see [inode to data blocks](#inode-to-data-blocks). +1. Interpret the data blocks. + +### Create File + +### Write File Contents diff --git a/filesystem/ext4/ext4_test.go b/filesystem/ext4/ext4_test.go new file mode 100644 index 00000000..9f75245c --- /dev/null +++ b/filesystem/ext4/ext4_test.go @@ -0,0 +1,148 @@ +package ext4 + +import ( + "bytes" + "cmp" + "errors" + "fmt" + "io" + "os" + "slices" + "strings" + "testing" + + "github.com/go-test/deep" +) + +const ( + randomDataFile = "testdata/dist/random.dat" +) + +func TestReadDirectory(t *testing.T) { + // read the foo directory file, which was created from debugfs + fooDirEntries, err := testDirEntriesFromDebugFS(fooDirFile) + if err != nil { + t.Fatalf("Error reading foo directory entries from debugfs: %v", err) + } + + // read the root directory file, which was created from debugfs + rootDirEntries, err := testDirEntriesFromDebugFS(rootDirFile) + if err != nil { + t.Fatalf("Error reading root directory entries from debugfs: %v", err) + } + + tests := []struct { + name string + inode uint32 + entries []*directoryEntry + err error + }{ + {"invalid inode", 0, nil, errors.New("could not read inode")}, + {"root", 2, rootDirEntries, nil}, + {"foo dir", 13, fooDirEntries, nil}, + } + f, err := os.Open(imgFile) + if err != nil { + t.Fatalf("Error opening test image: %v", err) + } + defer f.Close() + fs, err := Read(f, 100*MB, 0, 512) + if err != nil { + t.Fatalf("Error reading filesystem: %v", err) + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + entries, err := fs.readDirectory(tt.inode) + switch { + case err != nil && tt.err == nil: + t.Fatalf("unexpected error reading directory: %v", err) + case err == nil && tt.err != nil: + t.Fatalf("expected error reading directory: %v", tt.err) + case err != nil && tt.err != nil && !strings.HasPrefix(err.Error(), tt.err.Error()): + t.Fatalf("mismatched error reading directory, expected '%v' got '%v'", tt.err, err) + default: + sortFunc := func(a, b *directoryEntry) int { + return cmp.Compare(a.filename, b.filename) + } + slices.SortFunc(entries, sortFunc) + slices.SortFunc(tt.entries, sortFunc) + if diff := deep.Equal(entries, tt.entries); diff != nil { + t.Errorf("directory entries mismatch: %v", diff) + } + } + }) + } +} + +func TestReadFile(t *testing.T) { + randomFileData, err := os.ReadFile(randomDataFile) + if err != nil { + t.Fatalf("Error opening random data file %s: %v", randomDataFile, err) + } + tests := []struct { + name string + path string + offset int64 + size int + readAll bool + expected []byte + err error + }{ + {"invalid path", "/do/not/exist/any/where", 0, 0, false, nil, errors.New("could not read directory entries")}, + {"large file", "/random.dat", 0, len(randomFileData), false, randomFileData, nil}, + {"offset in file", "/random.dat", 5000, 1000, false, randomFileData[5000:6000], nil}, + {"readall", "/random.dat", 0, 0, true, randomFileData, nil}, + {"hard link", "/hardlink.dat", 0, 0, true, randomFileData, nil}, + {"valid symlink", "/symlink.dat", 0, 0, true, randomFileData, nil}, + {"absolute symlink", "/absolutesymlink", 0, 0, true, randomFileData, nil}, + {"dead symlink", "/deadlink", 0, 0, true, nil, fmt.Errorf("target file %s does not exist", "/nonexistent")}, + {"dead long symlink", "/deadlonglink", 0, 0, true, nil, errors.New("could not read directory entries")}, + } + f, err := os.Open(imgFile) + if err != nil { + t.Fatalf("Error opening test image: %v", err) + } + defer f.Close() + fs, err := Read(f, 100*MB, 0, 512) + if err != nil { + t.Fatalf("Error reading filesystem: %v", err) + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fsFile, err := fs.OpenFile(tt.path, 0o600) + switch { + case err != nil && tt.err == nil: + t.Fatalf("unexpected error opening file: %v", err) + case err == nil && tt.err != nil: + t.Fatalf("expected error opening file: %v", tt.err) + case err != nil && tt.err != nil && !strings.HasPrefix(err.Error(), tt.err.Error()): + t.Fatalf("mismatched error opening file, expected '%v' got '%v'", tt.err, err) + case err == nil: + var b []byte + if tt.readAll { + tt.size = len(tt.expected) + b, err = io.ReadAll(fsFile) + if err != nil { + t.Fatalf("Error reading file: %v", err) + } + } else { + if _, err := fsFile.Seek(tt.offset, io.SeekStart); err != nil { + t.Fatalf("Error seeking file: %v", err) + } + b = make([]byte, tt.size) + var n int + n, err = fsFile.Read(b) + if n != len(b) { + t.Fatalf("short read, expected %d bytes got %d", len(b), n) + } + } + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("Error reading file: %v", err) + } + if !bytes.Equal(b, tt.expected) { + t.Errorf("file data mismatch") + } + } + }) + } +} diff --git a/filesystem/ext4/extent.go b/filesystem/ext4/extent.go new file mode 100644 index 00000000..33d8cff0 --- /dev/null +++ b/filesystem/ext4/extent.go @@ -0,0 +1,320 @@ +package ext4 + +import ( + "encoding/binary" + "fmt" +) + +const ( + extentTreeHeaderLength int = 12 + extentTreeEntryLength int = 12 + extentHeaderSignature uint16 = 0xf30a + extentTreeMaxDepth int = 5 +) + +// extens a structure holding multiple extents +type extents []extent + +// extent a structure with information about a single contiguous run of blocks containing file data +type extent struct { + // fileBlock block number relative to the file. E.g. if the file is composed of 5 blocks, this could be 0-4 + fileBlock uint32 + // startingBlock the first block on disk that contains the data in this extent. E.g. if the file is made up of data from blocks 100-104 on the disk, this would be 100 + startingBlock uint64 + // count how many contiguous blocks are covered by this extent + count uint16 +} + +// equal if 2 extents are equal +// +//nolint:unused // useful function for future +func (e *extent) equal(a *extent) bool { + if (e == nil && a != nil) || (a == nil && e != nil) { + return false + } + if e == nil && a == nil { + return true + } + return *e == *a +} + +// blocks how many blocks are covered in the extents +// +//nolint:unused // useful function for future +func (e extents) blocks() uint64 { + var count uint64 + for _, ext := range e { + count += uint64(ext.count) + } + return count +} + +// extentBlockFinder provides a way of finding the blocks on disk that represent the block range of a given file. +// Arguments are the starting and ending blocks in the file. Returns a slice of blocks to read on disk. +// These blocks are in order. For example, if you ask to read file blocks starting at 20 for a count of 25, then you might +// get a single fileToBlocks{block: 100, count: 25} if the file is contiguous on disk. Or you might get +// fileToBlocks{block: 100, count: 10}, fileToBlocks{block: 200, count: 15} if the file is fragmented on disk. +// The slice should be read in order. +type extentBlockFinder interface { + // findBlocks find the actual blocks for a range in the file, given the start block in the file and how many blocks + findBlocks(start, count uint64, fs *FileSystem) ([]uint64, error) + // blocks get all of the blocks for a file, in sequential order, essentially unravels the tree into a slice of extents + blocks(fs *FileSystem) (extents, error) + // toBytes convert this extentBlockFinder to bytes to be stored in a block or inode + toBytes() []byte +} + +var ( + _ extentBlockFinder = &extentInternalNode{} + _ extentBlockFinder = &extentLeafNode{} +) + +// extentNodeHeader represents the header of an extent node +type extentNodeHeader struct { + depth uint16 // the depth of tree below here; for leaf nodes, will be 0 + entries uint16 // number of entries + max uint16 // maximum number of entries allowed at this level + blockSize uint32 // block size for this tree +} + +func (e extentNodeHeader) toBytes() []byte { + b := make([]byte, 12) + binary.LittleEndian.PutUint16(b[0:2], extentHeaderSignature) + binary.LittleEndian.PutUint16(b[2:4], e.entries) + binary.LittleEndian.PutUint16(b[4:6], e.max) + binary.LittleEndian.PutUint16(b[6:8], e.depth) + return b +} + +// extentChildPtr represents a child pointer in an internal node of extents +// the child could be a leaf node or another internal node. We only would know +// after parsing diskBlock to see its header. +type extentChildPtr struct { + fileBlock uint32 // extents or children of this cover from file block fileBlock onwards + count uint32 // how many blocks are covered by this extent + diskBlock uint64 // block number where the children live +} + +// extentLeafNode represents a leaf node of extents +// it includes the information in the header and the extents (leaf nodes). +// By definition, this is a leaf node, so depth=0 +type extentLeafNode struct { + extentNodeHeader + extents extents // the actual extents +} + +// findBlocks find the actual blocks for a range in the file. leaf nodes already have all of the data inside, +// so the FileSystem reference is unused. +func (e extentLeafNode) findBlocks(start, count uint64, _ *FileSystem) ([]uint64, error) { + var ret []uint64 + + // before anything, figure out which file block is the start and end of the desired range + end := start + count - 1 + + // we are at the bottom of the tree, so we can just return the extents + for _, ext := range e.extents { + extentStart := uint64(ext.fileBlock) + extentEnd := uint64(ext.fileBlock + uint32(ext.count) - 1) + + // Check if the extent does not overlap with the given block range + if extentEnd < start || extentStart > end { + continue + } + + // Calculate the overlapping range + overlapStart := max(start, extentStart) + overlapEnd := min(end, extentEnd) + + // Calculate the starting disk block for the overlap + diskBlockStart := ext.startingBlock + (overlapStart - extentStart) + + // Append the corresponding disk blocks to the result + for i := uint64(0); i <= overlapEnd-overlapStart; i++ { + ret = append(ret, diskBlockStart+i) + } + } + return ret, nil +} + +// blocks find the actual blocks for a range in the file. leaf nodes already have all of the data inside, +// so the FileSystem reference is unused. +func (e extentLeafNode) blocks(_ *FileSystem) (extents, error) { + return e.extents[:], nil +} + +// toBytes convert the node to raw bytes to be stored, either in a block or in an inode +func (e extentLeafNode) toBytes() []byte { + // 12 byte header, 12 bytes per child + b := make([]byte, 12+12*e.max) + copy(b[0:12], e.extentNodeHeader.toBytes()) + + for i, ext := range e.extents { + base := (i + 1) * 12 + binary.LittleEndian.PutUint32(b[base:base+4], ext.fileBlock) + binary.LittleEndian.PutUint16(b[base+4:base+6], ext.count) + diskBlock := make([]byte, 8) + binary.LittleEndian.PutUint64(diskBlock, ext.startingBlock) + copy(b[base+6:base+8], diskBlock[4:6]) + copy(b[base+8:base+12], diskBlock[0:4]) + } + return b +} + +// extentInternalNode represents an internal node in a tree of extents +// it includes the information in the header and the internal nodes +// By definition, this is an internal node, so depth>0 +type extentInternalNode struct { + extentNodeHeader + children []*extentChildPtr // the children +} + +// findBlocks find the actual blocks for a range in the file. internal nodes need to read the filesystem to +// get the child nodes, so the FileSystem reference is used. +func (e extentInternalNode) findBlocks(start, count uint64, fs *FileSystem) ([]uint64, error) { + var ret []uint64 + + // before anything, figure out which file block is the start and end of the desired range + end := start + count - 1 + + // we are not depth 0, so we have children extent tree nodes. Figure out which ranges we are in. + // the hard part here is that each child has start but not end or count. You only know it from reading the next one. + // So if the one we are looking at is in the range, we get it from the children, and keep going + for _, child := range e.children { + extentStart := uint64(child.fileBlock) + extentEnd := uint64(child.fileBlock + child.count - 1) + + // Check if the extent does not overlap with the given block range + if extentEnd < start || extentStart > end { + continue + } + + // read the extent block from the disk + b, err := fs.readBlock(child.diskBlock) + if err != nil { + return nil, err + } + ebf, err := parseExtents(b, e.blockSize, uint32(extentStart), uint32(extentEnd)) + if err != nil { + return nil, err + } + blocks, err := ebf.findBlocks(extentStart, uint64(child.count), fs) + if err != nil { + return nil, err + } + if len(blocks) > 0 { + ret = append(ret, blocks...) + } + } + return ret, nil +} + +// blocks find the actual blocks for a range in the file. leaf nodes already have all of the data inside, +// so the FileSystem reference is unused. +func (e extentInternalNode) blocks(fs *FileSystem) (extents, error) { + var ret extents + + // we are not depth 0, so we have children extent tree nodes. Walk the tree below us and find all of the blocks + for _, child := range e.children { + // read the extent block from the disk + b, err := fs.readBlock(child.diskBlock) + if err != nil { + return nil, err + } + ebf, err := parseExtents(b, e.blockSize, child.fileBlock, child.fileBlock+child.count-1) + if err != nil { + return nil, err + } + blocks, err := ebf.blocks(fs) + if err != nil { + return nil, err + } + if len(blocks) > 0 { + ret = append(ret, blocks...) + } + } + return ret, nil +} + +// toBytes convert the node to raw bytes to be stored, either in a block or in an inode +func (e extentInternalNode) toBytes() []byte { + // 12 byte header, 12 bytes per child + b := make([]byte, 12+12*e.max) + copy(b[0:12], e.extentNodeHeader.toBytes()) + + for i, child := range e.children { + base := (i + 1) * 12 + binary.LittleEndian.PutUint32(b[base:base+4], child.fileBlock) + diskBlock := make([]byte, 8) + binary.LittleEndian.PutUint64(diskBlock, child.diskBlock) + copy(b[base+4:base+8], diskBlock[0:4]) + copy(b[base+8:base+10], diskBlock[4:6]) + } + return b +} + +// parseExtents takes bytes, parses them to find the actual extents or the next blocks down. +// It does not recurse down the tree, as we do not want to do that until we actually are ready +// to read those blocks. This is similar to how ext4 driver in the Linux kernel does it. +// totalBlocks is the total number of blocks covered in this given section of the extent tree. +func parseExtents(b []byte, blocksize, start, count uint32) (extentBlockFinder, error) { + var ret extentBlockFinder + // must have at least header and one entry + minLength := extentTreeHeaderLength + extentTreeEntryLength + if len(b) < minLength { + return nil, fmt.Errorf("cannot parse extent tree from %d bytes, minimum required %d", len(b), minLength) + } + // check magic signature + if binary.LittleEndian.Uint16(b[0:2]) != extentHeaderSignature { + return nil, fmt.Errorf("invalid extent tree signature: %x", b[0x0:0x2]) + } + e := extentNodeHeader{ + entries: binary.LittleEndian.Uint16(b[0x2:0x4]), + max: binary.LittleEndian.Uint16(b[0x4:0x6]), + depth: binary.LittleEndian.Uint16(b[0x6:0x8]), + blockSize: blocksize, + } + // b[0x8:0xc] is used for the generation by Lustre but not standard ext4, so we ignore + + // we have parsed the header, now read either the leaf entries or the intermediate nodes + switch e.depth { + case 0: + var leafNode extentLeafNode + // read the leaves + for i := 0; i < int(e.entries); i++ { + start := i*extentTreeEntryLength + extentTreeHeaderLength + diskBlock := make([]byte, 8) + copy(diskBlock[0:4], b[start+8:start+12]) + copy(diskBlock[4:6], b[start+6:start+8]) + leafNode.extents = append(leafNode.extents, extent{ + fileBlock: binary.LittleEndian.Uint32(b[start : start+4]), + count: binary.LittleEndian.Uint16(b[start+4 : start+6]), + startingBlock: binary.LittleEndian.Uint64(diskBlock), + }) + } + ret = leafNode + default: + var ( + internalNode extentInternalNode + ) + for i := 0; i < int(e.entries); i++ { + start := i*extentTreeEntryLength + extentTreeHeaderLength + diskBlock := make([]byte, 8) + copy(diskBlock[0:4], b[start+4:start+8]) + copy(diskBlock[4:6], b[start+8:start+10]) + ptr := &extentChildPtr{ + diskBlock: binary.LittleEndian.Uint64(diskBlock), + fileBlock: binary.LittleEndian.Uint32(b[start : start+4]), + } + internalNode.children = append(internalNode.children, ptr) + if i > 0 { + internalNode.children[i-1].count = ptr.fileBlock - internalNode.children[i-1].fileBlock + } + } + if len(internalNode.children) > 0 { + internalNode.children[len(internalNode.children)-1].count = start + count - internalNode.children[len(internalNode.children)-1].fileBlock + } + ret = internalNode + } + + return ret, nil +} diff --git a/filesystem/ext4/features.go b/filesystem/ext4/features.go new file mode 100644 index 00000000..9a8baa9e --- /dev/null +++ b/filesystem/ext4/features.go @@ -0,0 +1,451 @@ +package ext4 + +// features are defined +// beginning at https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/lib/ext2fs/ext2_fs.h#n820 + +// featureFlags is a structure holding which flags are set - compatible, incompatible and read-only compatible +type featureFlags struct { + // compatible, incompatible, and compatibleReadOnly feature flags + directoryPreAllocate bool + imagicInodes bool + hasJournal bool + extendedAttributes bool + reservedGDTBlocksForExpansion bool + directoryIndices bool + lazyBlockGroup bool + excludeInode bool + excludeBitmap bool + sparseSuperBlockV2 bool + fastCommit bool + stableInodes bool + orphanFile bool + compression bool + directoryEntriesRecordFileType bool + recoveryNeeded bool + separateJournalDevice bool + metaBlockGroups bool + extents bool + fs64Bit bool + multipleMountProtection bool + flexBlockGroups bool + extendedAttributeInodes bool + dataInDirectoryEntries bool + metadataChecksumSeedInSuperblock bool + largeDirectory bool + dataInInode bool + encryptInodes bool + sparseSuperblock bool + largeFile bool + btreeDirectory bool + hugeFile bool + gdtChecksum bool + largeSubdirectoryCount bool + largeInodes bool + snapshot bool + quota bool + bigalloc bool + metadataChecksums bool + replicas bool + readOnly bool + projectQuotas bool +} + +func parseFeatureFlags(compatFlags, incompatFlags, roCompatFlags uint32) featureFlags { + f := featureFlags{ + directoryPreAllocate: compatFeatureDirectoryPreAllocate.included(compatFlags), + imagicInodes: compatFeatureImagicInodes.included(compatFlags), + hasJournal: compatFeatureHasJournal.included(compatFlags), + extendedAttributes: compatFeatureExtendedAttributes.included(compatFlags), + reservedGDTBlocksForExpansion: compatFeatureReservedGDTBlocksForExpansion.included(compatFlags), + directoryIndices: compatFeatureDirectoryIndices.included(compatFlags), + lazyBlockGroup: compatFeatureLazyBlockGroup.included(compatFlags), + excludeInode: compatFeatureExcludeInode.included(compatFlags), + excludeBitmap: compatFeatureExcludeBitmap.included(compatFlags), + sparseSuperBlockV2: compatFeatureSparseSuperBlockV2.included(compatFlags), + fastCommit: compatFeatureFastCommit.included(compatFlags), + stableInodes: compatFeatureStableInodes.included(compatFlags), + orphanFile: compatFeatureOrphanFile.included(compatFlags), + compression: incompatFeatureCompression.included(incompatFlags), + directoryEntriesRecordFileType: incompatFeatureDirectoryEntriesRecordFileType.included(incompatFlags), + recoveryNeeded: incompatFeatureRecoveryNeeded.included(incompatFlags), + separateJournalDevice: incompatFeatureSeparateJournalDevice.included(incompatFlags), + metaBlockGroups: incompatFeatureMetaBlockGroups.included(incompatFlags), + extents: incompatFeatureExtents.included(incompatFlags), + fs64Bit: incompatFeature64Bit.included(incompatFlags), + multipleMountProtection: incompatFeatureMultipleMountProtection.included(incompatFlags), + flexBlockGroups: incompatFeatureFlexBlockGroups.included(incompatFlags), + extendedAttributeInodes: incompatFeatureExtendedAttributeInodes.included(incompatFlags), + dataInDirectoryEntries: incompatFeatureDataInDirectoryEntries.included(incompatFlags), + metadataChecksumSeedInSuperblock: incompatFeatureMetadataChecksumSeedInSuperblock.included(incompatFlags), + largeDirectory: incompatFeatureLargeDirectory.included(incompatFlags), + dataInInode: incompatFeatureDataInInode.included(incompatFlags), + encryptInodes: incompatFeatureEncryptInodes.included(incompatFlags), + sparseSuperblock: roCompatFeatureSparseSuperblock.included(roCompatFlags), + largeFile: roCompatFeatureLargeFile.included(roCompatFlags), + btreeDirectory: roCompatFeatureBtreeDirectory.included(roCompatFlags), + hugeFile: roCompatFeatureHugeFile.included(roCompatFlags), + gdtChecksum: roCompatFeatureGDTChecksum.included(roCompatFlags), + largeSubdirectoryCount: roCompatFeatureLargeSubdirectoryCount.included(roCompatFlags), + largeInodes: roCompatFeatureLargeInodes.included(roCompatFlags), + snapshot: roCompatFeatureSnapshot.included(roCompatFlags), + quota: roCompatFeatureQuota.included(roCompatFlags), + bigalloc: roCompatFeatureBigalloc.included(roCompatFlags), + metadataChecksums: roCompatFeatureMetadataChecksums.included(roCompatFlags), + replicas: roCompatFeatureReplicas.included(roCompatFlags), + readOnly: roCompatFeatureReadOnly.included(roCompatFlags), + projectQuotas: roCompatFeatureProjectQuotas.included(roCompatFlags), + } + + return f +} + +//nolint:gocyclo // we know this has cyclomatic complexity, but not worth breaking apart +func (f *featureFlags) toInts() (compatFlags, incompatFlags, roCompatFlags uint32) { + // compatible flags + if f.directoryPreAllocate { + compatFlags |= uint32(compatFeatureDirectoryPreAllocate) + } + if f.imagicInodes { + compatFlags |= uint32(compatFeatureImagicInodes) + } + if f.hasJournal { + compatFlags |= uint32(compatFeatureHasJournal) + } + if f.extendedAttributes { + compatFlags |= uint32(compatFeatureExtendedAttributes) + } + if f.reservedGDTBlocksForExpansion { + compatFlags |= uint32(compatFeatureReservedGDTBlocksForExpansion) + } + if f.directoryIndices { + compatFlags |= uint32(compatFeatureDirectoryIndices) + } + if f.lazyBlockGroup { + compatFlags |= uint32(compatFeatureLazyBlockGroup) + } + if f.excludeInode { + compatFlags |= uint32(compatFeatureExcludeInode) + } + if f.excludeBitmap { + compatFlags |= uint32(compatFeatureExcludeBitmap) + } + if f.sparseSuperBlockV2 { + compatFlags |= uint32(compatFeatureSparseSuperBlockV2) + } + if f.fastCommit { + compatFlags |= uint32(compatFeatureFastCommit) + } + if f.stableInodes { + compatFlags |= uint32(compatFeatureStableInodes) + } + if f.orphanFile { + compatFlags |= uint32(compatFeatureOrphanFile) + } + + // incompatible flags + if f.compression { + incompatFlags |= uint32(incompatFeatureCompression) + } + if f.directoryEntriesRecordFileType { + incompatFlags |= uint32(incompatFeatureDirectoryEntriesRecordFileType) + } + if f.recoveryNeeded { + incompatFlags |= uint32(incompatFeatureRecoveryNeeded) + } + if f.separateJournalDevice { + incompatFlags |= uint32(incompatFeatureSeparateJournalDevice) + } + if f.metaBlockGroups { + incompatFlags |= uint32(incompatFeatureMetaBlockGroups) + } + if f.extents { + incompatFlags |= uint32(incompatFeatureExtents) + } + if f.fs64Bit { + incompatFlags |= uint32(incompatFeature64Bit) + } + if f.multipleMountProtection { + incompatFlags |= uint32(incompatFeatureMultipleMountProtection) + } + if f.flexBlockGroups { + incompatFlags |= uint32(incompatFeatureFlexBlockGroups) + } + if f.extendedAttributeInodes { + incompatFlags |= uint32(incompatFeatureExtendedAttributeInodes) + } + if f.dataInDirectoryEntries { + incompatFlags |= uint32(incompatFeatureDataInDirectoryEntries) + } + if f.metadataChecksumSeedInSuperblock { + incompatFlags |= uint32(incompatFeatureMetadataChecksumSeedInSuperblock) + } + if f.largeDirectory { + incompatFlags |= uint32(incompatFeatureLargeDirectory) + } + if f.dataInInode { + incompatFlags |= uint32(incompatFeatureDataInInode) + } + if f.encryptInodes { + incompatFlags |= uint32(incompatFeatureEncryptInodes) + } + + // read only compatible flags + if f.sparseSuperblock { + roCompatFlags |= uint32(roCompatFeatureSparseSuperblock) + } + if f.largeFile { + roCompatFlags |= uint32(roCompatFeatureLargeFile) + } + if f.btreeDirectory { + roCompatFlags |= uint32(roCompatFeatureBtreeDirectory) + } + if f.hugeFile { + roCompatFlags |= uint32(roCompatFeatureHugeFile) + } + if f.gdtChecksum { + roCompatFlags |= uint32(roCompatFeatureGDTChecksum) + } + if f.largeSubdirectoryCount { + roCompatFlags |= uint32(roCompatFeatureLargeSubdirectoryCount) + } + if f.largeInodes { + roCompatFlags |= uint32(roCompatFeatureLargeInodes) + } + if f.snapshot { + roCompatFlags |= uint32(roCompatFeatureSnapshot) + } + if f.quota { + roCompatFlags |= uint32(roCompatFeatureQuota) + } + if f.bigalloc { + roCompatFlags |= uint32(roCompatFeatureBigalloc) + } + if f.metadataChecksums { + roCompatFlags |= uint32(roCompatFeatureMetadataChecksums) + } + if f.replicas { + roCompatFlags |= uint32(roCompatFeatureReplicas) + } + if f.readOnly { + roCompatFlags |= uint32(roCompatFeatureReadOnly) + } + if f.projectQuotas { + roCompatFlags |= uint32(roCompatFeatureProjectQuotas) + } + + return compatFlags, incompatFlags, roCompatFlags +} + +// default features +/* + base_features = sparse_super,large_file,filetype,resize_inode,dir_index,ext_attr + features = has_journal,extent,huge_file,flex_bg,uninit_bg,64bit,dir_nlink,extra_isize +*/ +var defaultFeatureFlags = featureFlags{ + largeFile: true, + hugeFile: true, + sparseSuperblock: true, + flexBlockGroups: true, + hasJournal: true, + extents: true, + fs64Bit: true, + extendedAttributes: true, +} + +type FeatureOpt func(*featureFlags) + +func WithFeatureDirectoryPreAllocate(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.directoryPreAllocate = enable + } +} +func WithFeatureImagicInodes(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.imagicInodes = enable + } +} +func WithFeatureHasJournal(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.hasJournal = enable + } +} +func WithFeatureExtendedAttributes(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.extendedAttributes = enable + } +} +func WithFeatureReservedGDTBlocksForExpansion(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.reservedGDTBlocksForExpansion = enable + } +} +func WithFeatureDirectoryIndices(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.directoryIndices = enable + } +} +func WithFeatureLazyBlockGroup(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.lazyBlockGroup = enable + } +} +func WithFeatureExcludeInode(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.excludeInode = enable + } +} +func WithFeatureExcludeBitmap(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.excludeBitmap = enable + } +} +func WithFeatureSparseSuperBlockV2(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.sparseSuperBlockV2 = enable + } +} +func WithFeatureCompression(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.compression = enable + } +} +func WithFeatureDirectoryEntriesRecordFileType(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.directoryEntriesRecordFileType = enable + } +} +func WithFeatureRecoveryNeeded(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.recoveryNeeded = enable + } +} +func WithFeatureSeparateJournalDevice(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.separateJournalDevice = enable + } +} +func WithFeatureMetaBlockGroups(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.metaBlockGroups = enable + } +} +func WithFeatureExtents(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.extents = enable + } +} +func WithFeatureFS64Bit(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.fs64Bit = enable + } +} +func WithFeatureMultipleMountProtection(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.multipleMountProtection = enable + } +} +func WithFeatureFlexBlockGroups(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.flexBlockGroups = enable + } +} +func WithFeatureExtendedAttributeInodes(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.extendedAttributeInodes = enable + } +} +func WithFeatureDataInDirectoryEntries(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.dataInDirectoryEntries = enable + } +} +func WithFeatureMetadataChecksumSeedInSuperblock(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.metadataChecksumSeedInSuperblock = enable + } +} +func WithFeatureLargeDirectory(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.largeDirectory = enable + } +} +func WithFeatureDataInInode(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.dataInInode = enable + } +} +func WithFeatureEncryptInodes(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.encryptInodes = enable + } +} +func WithFeatureSparseSuperblock(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.sparseSuperblock = enable + } +} +func WithFeatureLargeFile(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.largeFile = enable + } +} +func WithFeatureBTreeDirectory(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.btreeDirectory = enable + } +} +func WithFeatureHugeFile(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.hugeFile = enable + } +} +func WithFeatureGDTChecksum(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.gdtChecksum = enable + } +} +func WithFeatureLargeSubdirectoryCount(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.largeSubdirectoryCount = enable + } +} +func WithFeatureLargeInodes(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.largeInodes = enable + } +} +func WithFeatureSnapshot(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.snapshot = enable + } +} +func WithFeatureQuota(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.quota = enable + } +} +func WithFeatureBigalloc(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.bigalloc = enable + } +} +func WithFeatureMetadataChecksums(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.metadataChecksums = enable + } +} +func WithFeatureReplicas(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.replicas = enable + } +} +func WithFeatureReadOnly(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.readOnly = enable + } +} +func WithFeatureProjectQuotas(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.projectQuotas = enable + } +} diff --git a/filesystem/ext4/file.go b/filesystem/ext4/file.go new file mode 100644 index 00000000..337dcba1 --- /dev/null +++ b/filesystem/ext4/file.go @@ -0,0 +1,118 @@ +package ext4 + +import ( + "errors" + "fmt" + "io" +) + +// File represents a single file in an ext4 filesystem +type File struct { + *directoryEntry + *inode + isReadWrite bool + isAppend bool + offset int64 + filesystem *FileSystem + extents extents +} + +// Read reads up to len(b) bytes from the File. +// It returns the number of bytes read and any error encountered. +// At end of file, Read returns 0, io.EOF +// reads from the last known offset in the file from last read or write +// use Seek() to set at a particular point +func (fl *File) Read(b []byte) (int, error) { + var ( + fileSize = int64(fl.size) + blocksize = uint64(fl.filesystem.superblock.blockSize) + ) + if fl.offset >= fileSize { + return 0, io.EOF + } + + // Calculate the number of bytes to read + bytesToRead := int64(len(b)) + if fl.offset+bytesToRead > fileSize { + bytesToRead = fileSize - fl.offset + } + + // Create a buffer to hold the bytes to be read + readBytes := int64(0) + b = b[:bytesToRead] + + // the offset given for reading is relative to the file, so we need to calculate + // where these are in the extents relative to the file + readStartBlock := uint64(fl.offset) / blocksize + for _, e := range fl.extents { + // if the last block of the extent is before the first block we want to read, skip it + if uint64(e.fileBlock)+uint64(e.count) < readStartBlock { + continue + } + // extentSize is the number of bytes on the disk for the extent + extentSize := int64(e.count) * int64(blocksize) + // where do we start and end in the extent? + startPositionInExtent := fl.offset - int64(e.fileBlock)*int64(blocksize) + leftInExtent := extentSize - startPositionInExtent + // how many bytes are left to read + toReadInOffset := bytesToRead - readBytes + if toReadInOffset > leftInExtent { + toReadInOffset = leftInExtent + } + // read those bytes + startPosOnDisk := e.startingBlock*blocksize + uint64(startPositionInExtent) + b2 := make([]byte, toReadInOffset) + read, err := fl.filesystem.file.ReadAt(b2, int64(startPosOnDisk)) + if err != nil { + return int(readBytes), fmt.Errorf("failed to read bytes: %v", err) + } + copy(b[readBytes:], b2[:read]) + readBytes += int64(read) + fl.offset += int64(read) + + if readBytes >= bytesToRead { + break + } + } + var err error + if fl.offset >= fileSize { + err = io.EOF + } + + return int(readBytes), err +} + +// Write writes len(b) bytes to the File. +// It returns the number of bytes written and an error, if any. +// returns a non-nil error when n != len(b) +// writes to the last known offset in the file from last read or write +// use Seek() to set at a particular point +// +//nolint:revive // params not used because still read-only, will be used in the future when read-write +func (fl *File) Write(p []byte) (int, error) { + return 0, errors.New("not implemented") +} + +// Seek set the offset to a particular point in the file +func (fl *File) Seek(offset int64, whence int) (int64, error) { + newOffset := int64(0) + switch whence { + case io.SeekStart: + newOffset = offset + case io.SeekEnd: + newOffset = int64(fl.size) + offset + case io.SeekCurrent: + newOffset = fl.offset + offset + } + if newOffset < 0 { + return fl.offset, fmt.Errorf("cannot set offset %d before start of file", offset) + } + fl.offset = newOffset + return fl.offset, nil +} + +// Close close a file that is being read +func (fl *File) Close() error { + *fl = File{} + return nil +} diff --git a/filesystem/ext4/fileinfo.go b/filesystem/ext4/fileinfo.go new file mode 100644 index 00000000..4449c284 --- /dev/null +++ b/filesystem/ext4/fileinfo.go @@ -0,0 +1,48 @@ +package ext4 + +import ( + "os" + "time" +) + +// FileInfo represents the information for an individual file +// it fulfills os.FileInfo interface +type FileInfo struct { + modTime time.Time + mode os.FileMode + name string + size int64 + isDir bool +} + +// IsDir abbreviation for Mode().IsDir() +func (fi FileInfo) IsDir() bool { + return fi.isDir +} + +// ModTime modification time +func (fi FileInfo) ModTime() time.Time { + return fi.modTime +} + +// Mode returns file mode +func (fi FileInfo) Mode() os.FileMode { + return fi.mode +} + +// Name base name of the file +// +// will return the long name of the file. If none exists, returns the shortname and extension +func (fi FileInfo) Name() string { + return fi.name +} + +// Size length in bytes for regular files +func (fi FileInfo) Size() int64 { + return fi.size +} + +// Sys underlying data source - not supported yet and so will return nil +func (fi FileInfo) Sys() interface{} { + return nil +} diff --git a/filesystem/ext4/groupdescriptors.go b/filesystem/ext4/groupdescriptors.go new file mode 100644 index 00000000..b2899025 --- /dev/null +++ b/filesystem/ext4/groupdescriptors.go @@ -0,0 +1,310 @@ +package ext4 + +import ( + "encoding/binary" + "fmt" + + "github.com/diskfs/go-diskfs/filesystem/ext4/crc" +) + +type blockGroupFlag uint16 +type gdtChecksumType uint8 + +func (b blockGroupFlag) included(a uint16) bool { + return a&uint16(b) == uint16(b) +} + +//nolint:unused // will be used in the future, not yet +func (g gdtChecksumType) included(a uint8) bool { + return a&uint8(g) == uint8(g) +} + +const ( + groupDescriptorSize uint16 = 32 + groupDescriptorSize64Bit uint16 = 64 + blockGroupFlagInodesUninitialized blockGroupFlag = 0x1 + blockGroupFlagBlockBitmapUninitialized blockGroupFlag = 0x2 + blockGroupFlagInodeTableZeroed blockGroupFlag = 0x4 + gdtChecksumNone gdtChecksumType = 0 + gdtChecksumGdt gdtChecksumType = 1 + gdtChecksumMetadata gdtChecksumType = 2 +) + +type blockGroupFlags struct { + inodesUninitialized bool + blockBitmapUninitialized bool + inodeTableZeroed bool +} + +// groupdescriptors is a structure holding all of the group descriptors for all of the block groups +type groupDescriptors struct { + descriptors []groupDescriptor +} + +// groupDescriptor is a structure holding the data about a single block group +type groupDescriptor struct { + blockBitmapLocation uint64 + inodeBitmapLocation uint64 + inodeTableLocation uint64 + freeBlocks uint32 + freeInodes uint32 + usedDirectories uint32 + flags blockGroupFlags + snapshotExclusionBitmapLocation uint64 + blockBitmapChecksum uint32 + inodeBitmapChecksum uint32 + unusedInodes uint32 + size uint16 + number uint16 +} + +func (gd *groupDescriptor) equal(other *groupDescriptor) bool { + if other == nil { + return gd == nil + } + return *gd == *other +} + +func (gds *groupDescriptors) equal(a *groupDescriptors) bool { + if gds == nil && a == nil { + return true + } + if (gds == nil && a != nil) || (a == nil && gds != nil) || len(gds.descriptors) != len(a.descriptors) { + return false + } + + // both not nil, same size, so compare them + for i, g := range gds.descriptors { + if g != a.descriptors[i] { + return false + } + } + // if we made it this far, all the same + return true +} + +// groupDescriptorsFromBytes create a groupDescriptors struct from bytes +func groupDescriptorsFromBytes(b []byte, gdSize uint16, hashSeed uint32, checksumType gdtChecksumType) (*groupDescriptors, error) { + gds := groupDescriptors{} + gdSlice := make([]groupDescriptor, 0, 10) + + count := len(b) / int(gdSize) + + // go through them gdSize bytes at a time + for i := 0; i < count; i++ { + start := i * int(gdSize) + end := start + int(gdSize) + gd, err := groupDescriptorFromBytes(b[start:end], gdSize, i, checksumType, hashSeed) + if err != nil || gd == nil { + return nil, fmt.Errorf("error creating group descriptor from bytes: %w", err) + } + gdSlice = append(gdSlice, *gd) + } + gds.descriptors = gdSlice + + return &gds, nil +} + +// toBytes returns groupDescriptors ready to be written to disk +func (gds *groupDescriptors) toBytes(checksumType gdtChecksumType, hashSeed uint32) []byte { + b := make([]byte, 0, 10*groupDescriptorSize) + for _, gd := range gds.descriptors { + b2 := gd.toBytes(checksumType, hashSeed) + b = append(b, b2...) + } + + return b +} + +// groupDescriptorFromBytes create a groupDescriptor struct from bytes +func groupDescriptorFromBytes(b []byte, gdSize uint16, number int, checksumType gdtChecksumType, hashSeed uint32) (*groupDescriptor, error) { + // block count, reserved block count and free blocks depends on whether the fs is 64-bit or not + blockBitmapLocation := make([]byte, 8) + inodeBitmapLocation := make([]byte, 8) + inodeTableLocation := make([]byte, 8) + freeBlocks := make([]byte, 4) + freeInodes := make([]byte, 4) + usedirectories := make([]byte, 4) + snapshotExclusionBitmapLocation := make([]byte, 8) + blockBitmapChecksum := make([]byte, 4) + inodeBitmapChecksum := make([]byte, 4) + unusedInodes := make([]byte, 4) + + copy(blockBitmapLocation[0:4], b[0x0:0x4]) + copy(inodeBitmapLocation[0:4], b[0x4:0x8]) + copy(inodeTableLocation[0:4], b[0x8:0xc]) + copy(freeBlocks[0:2], b[0xc:0xe]) + copy(freeInodes[0:2], b[0xe:0x10]) + copy(usedirectories[0:2], b[0x10:0x12]) + copy(snapshotExclusionBitmapLocation[0:4], b[0x14:0x18]) + copy(blockBitmapChecksum[0:2], b[0x18:0x1a]) + copy(inodeBitmapChecksum[0:2], b[0x1a:0x1c]) + copy(unusedInodes[0:2], b[0x1c:0x1e]) + + if gdSize == 64 { + copy(blockBitmapLocation[4:8], b[0x20:0x24]) + copy(inodeBitmapLocation[4:8], b[0x24:0x28]) + copy(inodeTableLocation[4:8], b[0x28:0x2c]) + copy(freeBlocks[2:4], b[0x2c:0x2e]) + copy(freeInodes[2:4], b[0x2e:0x30]) + copy(usedirectories[2:4], b[0x30:0x32]) + copy(unusedInodes[2:4], b[0x32:0x34]) + copy(snapshotExclusionBitmapLocation[4:8], b[0x34:0x38]) + copy(blockBitmapChecksum[2:4], b[0x38:0x3a]) + copy(inodeBitmapChecksum[2:4], b[0x3a:0x3c]) + } + + gdNumber := uint16(number) + // only bother with checking the checksum if it was not type none (pre-checksums) + if checksumType != gdtChecksumNone { + checksum := binary.LittleEndian.Uint16(b[0x1e:0x20]) + actualChecksum := groupDescriptorChecksum(b[0x0:0x40], hashSeed, gdNumber, checksumType) + if checksum != actualChecksum { + return nil, fmt.Errorf("checksum mismatch, passed %x, actual %x", checksum, actualChecksum) + } + } + + gd := groupDescriptor{ + size: gdSize, + number: gdNumber, + blockBitmapLocation: binary.LittleEndian.Uint64(blockBitmapLocation), + inodeBitmapLocation: binary.LittleEndian.Uint64(inodeBitmapLocation), + inodeTableLocation: binary.LittleEndian.Uint64(inodeTableLocation), + freeBlocks: binary.LittleEndian.Uint32(freeBlocks), + freeInodes: binary.LittleEndian.Uint32(freeInodes), + usedDirectories: binary.LittleEndian.Uint32(usedirectories), + snapshotExclusionBitmapLocation: binary.LittleEndian.Uint64(snapshotExclusionBitmapLocation), + blockBitmapChecksum: binary.LittleEndian.Uint32(blockBitmapChecksum), + inodeBitmapChecksum: binary.LittleEndian.Uint32(inodeBitmapChecksum), + unusedInodes: binary.LittleEndian.Uint32(unusedInodes), + flags: parseBlockGroupFlags(binary.LittleEndian.Uint16(b[0x12:0x14])), + } + + return &gd, nil +} + +// toBytes returns a groupDescriptor ready to be written to disk +func (gd *groupDescriptor) toBytes(checksumType gdtChecksumType, hashSeed uint32) []byte { + gdSize := gd.size + + b := make([]byte, gdSize) + + blockBitmapLocation := make([]byte, 8) + inodeBitmapLocation := make([]byte, 8) + inodeTableLocation := make([]byte, 8) + freeBlocks := make([]byte, 4) + freeInodes := make([]byte, 4) + usedirectories := make([]byte, 4) + snapshotExclusionBitmapLocation := make([]byte, 8) + blockBitmapChecksum := make([]byte, 4) + inodeBitmapChecksum := make([]byte, 4) + unusedInodes := make([]byte, 4) + + binary.LittleEndian.PutUint64(blockBitmapLocation, gd.blockBitmapLocation) + binary.LittleEndian.PutUint64(inodeTableLocation, gd.inodeTableLocation) + binary.LittleEndian.PutUint64(inodeBitmapLocation, gd.inodeBitmapLocation) + binary.LittleEndian.PutUint32(freeBlocks, gd.freeBlocks) + binary.LittleEndian.PutUint32(freeInodes, gd.freeInodes) + binary.LittleEndian.PutUint32(usedirectories, gd.usedDirectories) + binary.LittleEndian.PutUint64(snapshotExclusionBitmapLocation, gd.snapshotExclusionBitmapLocation) + binary.LittleEndian.PutUint32(blockBitmapChecksum, gd.blockBitmapChecksum) + binary.LittleEndian.PutUint32(inodeBitmapChecksum, gd.inodeBitmapChecksum) + binary.LittleEndian.PutUint32(unusedInodes, gd.unusedInodes) + + // copy the lower 32 bytes in + copy(b[0x0:0x4], blockBitmapLocation[0:4]) + copy(b[0x4:0x8], inodeBitmapLocation[0:4]) + copy(b[0x8:0xc], inodeTableLocation[0:4]) + copy(b[0xc:0xe], freeBlocks[0:2]) + copy(b[0xe:0x10], freeInodes[0:2]) + copy(b[0x10:0x12], usedirectories[0:2]) + binary.LittleEndian.PutUint16(b[0x12:0x14], gd.flags.toInt()) + copy(b[0x14:0x18], snapshotExclusionBitmapLocation[0:4]) + copy(b[0x18:0x1a], blockBitmapChecksum[0:2]) + copy(b[0x1a:0x1c], inodeBitmapChecksum[0:2]) + copy(b[0x1c:0x1e], unusedInodes[0:2]) + + // now for the upper 32 bytes + if gd.size == 64 { + copy(b[0x20:0x24], blockBitmapLocation[4:8]) + copy(b[0x24:0x28], inodeBitmapLocation[4:8]) + copy(b[0x28:0x2c], inodeTableLocation[4:8]) + copy(b[0x2c:0x2e], freeBlocks[2:4]) + copy(b[0x2e:0x30], freeInodes[2:4]) + copy(b[0x30:0x32], usedirectories[2:4]) + copy(b[0x32:0x34], unusedInodes[2:4]) + copy(b[0x34:0x38], snapshotExclusionBitmapLocation[4:8]) + copy(b[0x38:0x3a], blockBitmapChecksum[2:4]) + copy(b[0x3a:0x3c], inodeBitmapChecksum[2:4]) + } + + checksum := groupDescriptorChecksum(b[0x0:0x40], hashSeed, gd.number, checksumType) + binary.LittleEndian.PutUint16(b[0x1e:0x20], checksum) + + return b +} + +func parseBlockGroupFlags(flags uint16) blockGroupFlags { + f := blockGroupFlags{ + inodeTableZeroed: blockGroupFlagInodeTableZeroed.included(flags), + inodesUninitialized: blockGroupFlagInodesUninitialized.included(flags), + blockBitmapUninitialized: blockGroupFlagBlockBitmapUninitialized.included(flags), + } + + return f +} + +func (f *blockGroupFlags) toInt() uint16 { + var ( + flags uint16 + ) + + // compatible flags + if f.inodeTableZeroed { + flags |= uint16(blockGroupFlagInodeTableZeroed) + } + if f.inodesUninitialized { + flags |= uint16(blockGroupFlagInodesUninitialized) + } + if f.blockBitmapUninitialized { + flags |= uint16(blockGroupFlagBlockBitmapUninitialized) + } + return flags +} + +// groupDescriptorChecksum calculate the checksum for a block group descriptor +// NOTE: we are assuming that the block group number is uint64, but we do not know that to be true +// +// it might be uint32 or uint64, and it might be in BigEndian as opposed to LittleEndian +// just have to start with this and see +// we do know that the maximum number of block groups in 32-bit mode is 2^19, which must be uint32 +// and in 64-bit mode it is 2^51 which must be uint64 +// So we start with uint32 = [4]byte{} for regular mode and [8]byte{} for mod32 +func groupDescriptorChecksum(b []byte, hashSeed uint32, groupNumber uint16, checksumType gdtChecksumType) uint16 { + var checksum uint16 + + numBytes := make([]byte, 4) + binary.LittleEndian.PutUint16(numBytes, groupNumber) + switch checksumType { + case gdtChecksumNone: + checksum = 0 + case gdtChecksumMetadata: + // metadata checksum applies groupNumber to seed, then zeroes out checksum bytes from entire descriptor, then applies descriptor bytes + crcResult := crc.CRC32c(hashSeed, numBytes) + b2 := make([]byte, len(b)) + copy(b2, b) + b2[0x1e] = 0 + b2[0x1f] = 0 + crcResult = crc.CRC32c(crcResult, b2) + checksum = uint16(crcResult & 0xffff) + case gdtChecksumGdt: + hashSeed16 := uint16(hashSeed & 0xffff) + crcResult := crc.CRC16(hashSeed16, numBytes) + b2 := make([]byte, len(b)) + copy(b2, b) + b2[0x1e] = 0 + b2[0x1f] = 0 + checksum = crc.CRC16(crcResult, b) + } + return checksum +} diff --git a/filesystem/ext4/groupdescriptors_test.go b/filesystem/ext4/groupdescriptors_test.go new file mode 100644 index 00000000..27f87d23 --- /dev/null +++ b/filesystem/ext4/groupdescriptors_test.go @@ -0,0 +1,101 @@ +package ext4 + +import ( + "fmt" + "os" + "testing" + + "github.com/go-test/deep" +) + +func testGetValidRootDirectory() (dir *Directory, bytesPerBlock uint32, contents []byte, err error) { + // read the root directory file, which was created from debugfs + rootDirEntries, err := testDirEntriesFromDebugFS(rootDirFile) + if err != nil { + return nil, 0, nil, fmt.Errorf("error reading root directory entries from debugfs: %w", err) + } + dir = &Directory{ + root: true, + entries: rootDirEntries, + } + + testfile := testRootDirFile + // read the bytes from the disk + b, err := os.ReadFile(testfile) + if err != nil { + return nil, 0, nil, fmt.Errorf("Failed to read %s", testfile) + } + + return dir, 1024, b, nil +} + +func TestGroupDescriptorFromBytes(t *testing.T) { + sb, gds, _, b, err := testGetValidSuperblockAndGDTs() + if err != nil { + t.Fatalf("Error getting valid superblock: %v", err) + } + // we know which one we are reading + expected := &gds[0] + if err != nil { + t.Fatalf("Error getting valid group descriptor: %v", err) + } + gd, err := groupDescriptorFromBytes(b, sb.groupDescriptorSize, int(expected.number), sb.gdtChecksumType(), sb.checksumSeed) + if err != nil { + t.Errorf("Error parsing group descriptor: %v", err) + } + deep.CompareUnexportedFields = true + if diff := deep.Equal(gd, expected); diff != nil { + t.Errorf("groupDescriptorFromBytes() = %v", diff) + } +} + +func TestGroupDescriptorToBytes(t *testing.T) { + sb, gds, _, expected, err := testGetValidSuperblockAndGDTs() + if err != nil { + t.Fatalf("Error getting valid superblock: %v", err) + } + gd := &gds[0] + if err != nil { + t.Fatalf("Error getting valid group descriptor: %v", err) + } + b := gd.toBytes(sb.gdtChecksumType(), sb.checksumSeed) + expected = expected[:64] + diff, diffString := dumpByteSlicesWithDiffs(b, expected, 32, false, true, true) + if diff { + t.Errorf("groupdescriptor.toBytes() mismatched, actual then expected\n%s", diffString) + } +} + +func TestGroupDescriptorsFromBytes(t *testing.T) { + sb, expected, _, b, err := testGetValidSuperblockAndGDTs() + if err != nil { + t.Fatalf("Error getting valid superblock: %v", err) + } + gds, err := groupDescriptorsFromBytes(b, sb.groupDescriptorSize, sb.checksumSeed, sb.gdtChecksumType()) + if err != nil { + t.Errorf("Error parsing group descriptor: %v", err) + } + expectedGDS := &groupDescriptors{ + descriptors: expected, + } + deep.CompareUnexportedFields = true + if diff := deep.Equal(gds, expectedGDS); diff != nil { + t.Errorf("groupDescriptorsFromBytes() = %v", diff) + } +} + +func TestGroupDescriptorsToBytes(t *testing.T) { + sb, groupdescriptors, _, expected, err := testGetValidSuperblockAndGDTs() + if err != nil { + t.Fatalf("Error getting valid superblock: %v", err) + } + + gds := &groupDescriptors{ + descriptors: groupdescriptors, + } + b := gds.toBytes(sb.gdtChecksumType(), sb.checksumSeed) + diff, diffString := dumpByteSlicesWithDiffs(b, expected, 32, false, true, true) + if diff { + t.Errorf("groupDescriptors.toBytes() mismatched, actual then expected\n%s", diffString) + } +} diff --git a/filesystem/ext4/inode.go b/filesystem/ext4/inode.go new file mode 100644 index 00000000..01a077bc --- /dev/null +++ b/filesystem/ext4/inode.go @@ -0,0 +1,588 @@ +package ext4 + +import ( + "encoding/binary" + "fmt" + "time" + + "github.com/diskfs/go-diskfs/filesystem/ext4/crc" +) + +type inodeFlag uint32 +type fileType uint16 + +func (i inodeFlag) included(a uint32) bool { + return a&uint32(i) == uint32(i) +} + +const ( + ext2InodeSize uint16 = 128 + // minInodeSize is ext2 + the extra min 32 bytes in ext4 + minInodeExtraSize uint16 = 32 + wantInodeExtraSize uint16 = 128 + minInodeSize uint16 = ext2InodeSize + minInodeExtraSize + extentInodeMaxEntries int = 4 + inodeFlagSecureDeletion inodeFlag = 0x1 + inodeFlagPreserveForUndeletion inodeFlag = 0x2 + inodeFlagCompressed inodeFlag = 0x4 + inodeFlagSynchronous inodeFlag = 0x8 + inodeFlagImmutable inodeFlag = 0x10 + inodeFlagAppendOnly inodeFlag = 0x20 + inodeFlagNoDump inodeFlag = 0x40 + inodeFlagNoAccessTimeUpdate inodeFlag = 0x80 + inodeFlagDirtyCompressed inodeFlag = 0x100 + inodeFlagCompressedClusters inodeFlag = 0x200 + inodeFlagNoCompress inodeFlag = 0x400 + inodeFlagEncryptedInode inodeFlag = 0x800 + inodeFlagHashedDirectoryIndexes inodeFlag = 0x1000 + inodeFlagAFSMagicDirectory inodeFlag = 0x2000 + inodeFlagAlwaysJournal inodeFlag = 0x4000 + inodeFlagNoMergeTail inodeFlag = 0x8000 + inodeFlagSyncDirectoryData inodeFlag = 0x10000 + inodeFlagTopDirectory inodeFlag = 0x20000 + inodeFlagHugeFile inodeFlag = 0x40000 + inodeFlagUsesExtents inodeFlag = 0x80000 + inodeFlagExtendedAttributes inodeFlag = 0x200000 + inodeFlagBlocksPastEOF inodeFlag = 0x400000 + inodeFlagSnapshot inodeFlag = 0x1000000 + inodeFlagDeletingSnapshot inodeFlag = 0x4000000 + inodeFlagCompletedSnapshotShrink inodeFlag = 0x8000000 + inodeFlagInlineData inodeFlag = 0x10000000 + inodeFlagInheritProject inodeFlag = 0x20000000 + + fileTypeFifo fileType = 0x1000 + fileTypeCharacterDevice fileType = 0x2000 + fileTypeDirectory fileType = 0x4000 + fileTypeBlockDevice fileType = 0x6000 + fileTypeRegularFile fileType = 0x8000 + fileTypeSymbolicLink fileType = 0xA000 + fileTypeSocket fileType = 0xC000 + + filePermissionsOwnerExecute uint16 = 0x40 + filePermissionsOwnerWrite uint16 = 0x80 + filePermissionsOwnerRead uint16 = 0x100 + filePermissionsGroupExecute uint16 = 0x8 + filePermissionsGroupWrite uint16 = 0x10 + filePermissionsGroupRead uint16 = 0x20 + filePermissionsOtherExecute uint16 = 0x1 + filePermissionsOtherWrite uint16 = 0x2 + filePermissionsOtherRead uint16 = 0x4 +) + +// mountOptions is a structure holding flags for an inode +type inodeFlags struct { + secureDeletion bool + preserveForUndeletion bool + compressed bool + synchronous bool + immutable bool + appendOnly bool + noDump bool + noAccessTimeUpdate bool + dirtyCompressed bool + compressedClusters bool + noCompress bool + encryptedInode bool + hashedDirectoryIndexes bool + AFSMagicDirectory bool + alwaysJournal bool + noMergeTail bool + syncDirectoryData bool + topDirectory bool + hugeFile bool + usesExtents bool + extendedAttributes bool + blocksPastEOF bool + snapshot bool + deletingSnapshot bool + completedSnapshotShrink bool + inlineData bool + inheritProject bool +} + +type filePermissions struct { + read bool + write bool + execute bool +} + +// inode is a structure holding the data about an inode +type inode struct { + number uint32 + permissionsOther filePermissions + permissionsGroup filePermissions + permissionsOwner filePermissions + fileType fileType + owner uint32 + group uint32 + size uint64 + accessTime time.Time + changeTime time.Time + modifyTime time.Time + createTime time.Time + deletionTime uint32 + hardLinks uint16 + blocks uint64 + filesystemBlocks bool + flags *inodeFlags + version uint64 + nfsFileVersion uint32 + extendedAttributeBlock uint64 + inodeSize uint16 + project uint32 + extents extentBlockFinder + linkTarget string +} + +//nolint:unused // will be used in the future, not yet +func (i *inode) equal(a *inode) bool { + if (i == nil && a != nil) || (a == nil && i != nil) { + return false + } + if i == nil && a == nil { + return true + } + return *i == *a +} + +// inodeFromBytes create an inode struct from bytes +func inodeFromBytes(b []byte, sb *superblock, number uint32) (*inode, error) { + // safely make sure it is the min size + if len(b) < int(minInodeSize) { + return nil, fmt.Errorf("inode data too short: %d bytes, must be min %d bytes", len(b), minInodeSize) + } + + // checksum before using the data + checksumBytes := make([]byte, 4) + + // checksum before using the data + copy(checksumBytes[0:2], b[0x7c:0x7e]) + copy(checksumBytes[2:4], b[0x82:0x84]) + // zero out checksum fields before calculating the checksum + b[0x7c] = 0 + b[0x7d] = 0 + b[0x82] = 0 + b[0x83] = 0 + + // block count, reserved block count and free blocks depends on whether the fs is 64-bit or not + owner := make([]byte, 4) + fileSize := make([]byte, 8) + group := make([]byte, 4) + accessTime := make([]byte, 8) + changeTime := make([]byte, 8) + modifyTime := make([]byte, 8) + createTime := make([]byte, 8) + version := make([]byte, 8) + extendedAttributeBlock := make([]byte, 8) + + mode := binary.LittleEndian.Uint16(b[0x0:0x2]) + + copy(owner[0:2], b[0x2:0x4]) + copy(owner[2:4], b[0x78:0x7a]) + copy(group[0:2], b[0x18:0x20]) + copy(group[2:4], b[0x7a:0x7c]) + copy(fileSize[0:4], b[0x4:0x8]) + copy(fileSize[4:8], b[0x6c:0x70]) + copy(version[0:4], b[0x24:0x28]) + copy(version[4:8], b[0x98:0x9c]) + copy(extendedAttributeBlock[0:4], b[0x88:0x8c]) + copy(extendedAttributeBlock[4:6], b[0x76:0x78]) + + // get the the times + // the structure is as follows: + // original 32 bits (0:4) are seconds. Add (to the left) 2 more bits from the 32 + // the remaining 30 bites are nanoseconds + copy(accessTime[0:4], b[0x8:0xc]) + // take the two bits relevant and add to fifth byte + accessTime[4] = b[0x8c] & 0x3 + copy(changeTime[0:4], b[0xc:0x10]) + changeTime[4] = b[0x84] & 0x3 + copy(modifyTime[0:4], b[0x10:0x14]) + modifyTime[4] = b[0x88] & 0x3 + copy(createTime[0:4], b[0x90:0x94]) + createTime[4] = b[0x94] & 0x3 + + accessTimeSeconds := binary.LittleEndian.Uint64(accessTime) + changeTimeSeconds := binary.LittleEndian.Uint64(changeTime) + modifyTimeSeconds := binary.LittleEndian.Uint64(modifyTime) + createTimeSeconds := binary.LittleEndian.Uint64(createTime) + + // now get the nanoseconds by using the upper 30 bites + accessTimeNanoseconds := binary.LittleEndian.Uint32(b[0x8c:0x90]) >> 2 + changeTimeNanoseconds := binary.LittleEndian.Uint32(b[0x84:0x88]) >> 2 + modifyTimeNanoseconds := binary.LittleEndian.Uint32(b[0x88:0x8c]) >> 2 + createTimeNanoseconds := binary.LittleEndian.Uint32(b[0x94:0x98]) >> 2 + + flagsNum := binary.LittleEndian.Uint32(b[0x20:0x24]) + + flags := parseInodeFlags(flagsNum) + + blocksLow := binary.LittleEndian.Uint32(b[0x1c:0x20]) + blocksHigh := binary.LittleEndian.Uint16(b[0x74:0x76]) + var ( + blocks uint64 + filesystemBlocks bool + ) + + hugeFile := sb.features.hugeFile + switch { + case !hugeFile: + // just 512-byte blocks + blocks = uint64(blocksLow) + filesystemBlocks = false + case hugeFile && !flags.hugeFile: + // larger number of 512-byte blocks + blocks = uint64(blocksHigh)<<32 + uint64(blocksLow) + filesystemBlocks = false + default: + // larger number of filesystem blocks + blocks = uint64(blocksHigh)<<32 + uint64(blocksLow) + filesystemBlocks = true + } + fileType := parseFileType(mode) + fileSizeNum := binary.LittleEndian.Uint64(fileSize) + + extentInfo := make([]byte, 60) + copy(extentInfo, b[0x28:0x64]) + // symlinks might store link target in extentInfo, or might store them elsewhere + var ( + linkTarget string + allExtents extentBlockFinder + err error + ) + if fileType == fileTypeSymbolicLink && fileSizeNum < 60 { + linkTarget = string(extentInfo[:fileSizeNum]) + } else { + // parse the extent information in the inode to get the root of the extents tree + // we do not walk the entire tree, to get a slice of blocks for the file. + // If we want to do that, we call the extentBlockFinder.toBlocks() method + allExtents, err = parseExtents(extentInfo, sb.blockSize, 0, uint32(blocks)) + if err != nil { + return nil, fmt.Errorf("error parsing extent tree: %v", err) + } + } + + i := inode{ + number: number, + permissionsGroup: parseGroupPermissions(mode), + permissionsOwner: parseOwnerPermissions(mode), + permissionsOther: parseOtherPermissions(mode), + fileType: fileType, + owner: binary.LittleEndian.Uint32(owner), + group: binary.LittleEndian.Uint32(group), + size: fileSizeNum, + hardLinks: binary.LittleEndian.Uint16(b[0x1a:0x1c]), + blocks: blocks, + filesystemBlocks: filesystemBlocks, + flags: &flags, + nfsFileVersion: binary.LittleEndian.Uint32(b[0x64:0x68]), + version: binary.LittleEndian.Uint64(version), + inodeSize: binary.LittleEndian.Uint16(b[0x80:0x82]) + minInodeSize, + deletionTime: binary.LittleEndian.Uint32(b[0x14:0x18]), + accessTime: time.Unix(int64(accessTimeSeconds), int64(accessTimeNanoseconds)), + changeTime: time.Unix(int64(changeTimeSeconds), int64(changeTimeNanoseconds)), + modifyTime: time.Unix(int64(modifyTimeSeconds), int64(modifyTimeNanoseconds)), + createTime: time.Unix(int64(createTimeSeconds), int64(createTimeNanoseconds)), + extendedAttributeBlock: binary.LittleEndian.Uint64(extendedAttributeBlock), + project: binary.LittleEndian.Uint32(b[0x9c:0x100]), + extents: allExtents, + linkTarget: linkTarget, + } + checksum := binary.LittleEndian.Uint32(checksumBytes) + actualChecksum := inodeChecksum(b, sb.checksumSeed, number, i.nfsFileVersion) + + if actualChecksum != checksum { + return nil, fmt.Errorf("checksum mismatch, on-disk %x vs calculated %x", checksum, actualChecksum) + } + + return &i, nil +} + +// toBytes returns an inode ready to be written to disk +// +//nolint:unused // will be used in the future, not yet +func (i *inode) toBytes(sb *superblock) []byte { + iSize := sb.inodeSize + + b := make([]byte, iSize) + + mode := make([]byte, 2) + owner := make([]byte, 4) + fileSize := make([]byte, 8) + group := make([]byte, 4) + accessTime := make([]byte, 8) + changeTime := make([]byte, 8) + modifyTime := make([]byte, 8) + createTime := make([]byte, 8) + version := make([]byte, 8) + extendedAttributeBlock := make([]byte, 8) + + binary.LittleEndian.PutUint16(mode, i.permissionsGroup.toGroupInt()|i.permissionsOther.toOtherInt()|i.permissionsOwner.toOwnerInt()|uint16(i.fileType)) + binary.LittleEndian.PutUint32(owner, i.owner) + binary.LittleEndian.PutUint32(group, i.group) + binary.LittleEndian.PutUint64(fileSize, i.size) + binary.LittleEndian.PutUint64(version, i.version) + binary.LittleEndian.PutUint64(extendedAttributeBlock, i.extendedAttributeBlock) + + // there is some odd stuff that ext4 does with nanoseconds. We might need this in the future. + // See https://ext4.wiki.kernel.org/index.php/Ext4_Disk_Layout#Inode_Timestamps + // binary.LittleEndian.PutUint32(accessTime[4:8], (i.accessTimeNanoseconds<<2)&accessTime[4]) + binary.LittleEndian.PutUint64(accessTime, uint64(i.accessTime.Unix())) + binary.LittleEndian.PutUint32(accessTime[4:8], uint32(i.accessTime.Nanosecond())) + binary.LittleEndian.PutUint64(createTime, uint64(i.createTime.Unix())) + binary.LittleEndian.PutUint32(createTime[4:8], uint32(i.createTime.Nanosecond())) + binary.LittleEndian.PutUint64(changeTime, uint64(i.changeTime.Unix())) + binary.LittleEndian.PutUint32(changeTime[4:8], uint32(i.changeTime.Nanosecond())) + binary.LittleEndian.PutUint64(modifyTime, uint64(i.modifyTime.Unix())) + binary.LittleEndian.PutUint32(modifyTime[4:8], uint32(i.modifyTime.Nanosecond())) + + blocks := make([]byte, 8) + binary.LittleEndian.PutUint64(blocks, i.blocks) + + copy(b[0x0:0x2], mode) + copy(b[0x2:0x4], owner[0:2]) + copy(b[0x4:0x8], fileSize[0:4]) + copy(b[0x8:0xc], accessTime[0:4]) + copy(b[0xc:0x10], changeTime[0:4]) + copy(b[0x10:0x14], modifyTime[0:4]) + + binary.LittleEndian.PutUint32(b[0x14:0x18], i.deletionTime) + copy(b[0x18:0x1a], group[0:2]) + binary.LittleEndian.PutUint16(b[0x1a:0x1c], i.hardLinks) + copy(b[0x1c:0x20], blocks[0:4]) + binary.LittleEndian.PutUint32(b[0x20:0x24], i.flags.toInt()) + copy(b[0x24:0x28], version[0:4]) + copy(b[0x28:0x64], i.extents.toBytes()) + binary.LittleEndian.PutUint32(b[0x64:0x68], i.nfsFileVersion) + copy(b[0x68:0x6c], extendedAttributeBlock[0:4]) + copy(b[0x6c:0x70], fileSize[4:8]) + // b[0x70:0x74] is obsolete + copy(b[0x74:0x76], blocks[4:8]) + copy(b[0x76:0x78], extendedAttributeBlock[4:6]) + copy(b[0x78:0x7a], owner[2:4]) + copy(b[0x7a:0x7c], group[2:4]) + // b[0x7c:0x7e] is for checkeum + // b[0x7e:0x80] is unused + binary.LittleEndian.PutUint16(b[0x80:0x82], i.inodeSize-minInodeSize) + // b[0x82:0x84] is for checkeum + copy(b[0x84:0x88], changeTime[4:8]) + copy(b[0x88:0x8c], modifyTime[4:8]) + copy(b[0x8c:0x90], accessTime[4:8]) + copy(b[0x90:0x94], createTime[0:4]) + copy(b[0x94:0x98], createTime[4:8]) + + actualChecksum := inodeChecksum(b, sb.checksumSeed, i.number, i.nfsFileVersion) + checksum := make([]byte, 4) + binary.LittleEndian.PutUint32(checksum, actualChecksum) + copy(b[0x7c:0x7e], checksum[0:2]) + copy(b[0x82:0x84], checksum[2:4]) + + return b +} + +func parseOwnerPermissions(mode uint16) filePermissions { + return filePermissions{ + execute: mode&filePermissionsOwnerExecute == filePermissionsOwnerExecute, + write: mode&filePermissionsOwnerWrite == filePermissionsOwnerWrite, + read: mode&filePermissionsOwnerRead == filePermissionsOwnerRead, + } +} +func parseGroupPermissions(mode uint16) filePermissions { + return filePermissions{ + execute: mode&filePermissionsGroupExecute == filePermissionsGroupExecute, + write: mode&filePermissionsGroupWrite == filePermissionsGroupWrite, + read: mode&filePermissionsGroupRead == filePermissionsGroupRead, + } +} +func parseOtherPermissions(mode uint16) filePermissions { + return filePermissions{ + execute: mode&filePermissionsOtherExecute == filePermissionsOtherExecute, + write: mode&filePermissionsOtherWrite == filePermissionsOtherWrite, + read: mode&filePermissionsOtherRead == filePermissionsOtherRead, + } +} + +//nolint:unused // will be used in the future, not yet +func (fp *filePermissions) toOwnerInt() uint16 { + var mode uint16 + if fp.execute { + mode |= filePermissionsOwnerExecute + } + if fp.write { + mode |= filePermissionsOwnerWrite + } + if fp.read { + mode |= filePermissionsOwnerRead + } + return mode +} + +//nolint:unused // will be used in the future, not yet +func (fp *filePermissions) toOtherInt() uint16 { + var mode uint16 + if fp.execute { + mode |= filePermissionsOtherExecute + } + if fp.write { + mode |= filePermissionsOtherWrite + } + if fp.read { + mode |= filePermissionsOtherRead + } + return mode +} + +//nolint:unused // will be used in the future, not yet +func (fp *filePermissions) toGroupInt() uint16 { + var mode uint16 + if fp.execute { + mode |= filePermissionsGroupExecute + } + if fp.write { + mode |= filePermissionsGroupWrite + } + if fp.read { + mode |= filePermissionsGroupRead + } + return mode +} + +// parseFileType from the uint16 mode. The mode is built of bottom 12 bits +// being "any of" several permissions, and thus resolved via AND, +// while the top 4 bits are "only one of" several types, and thus resolved via just equal. +func parseFileType(mode uint16) fileType { + return fileType(mode & 0xF000) +} + +func parseInodeFlags(flags uint32) inodeFlags { + return inodeFlags{ + secureDeletion: inodeFlagSecureDeletion.included(flags), + preserveForUndeletion: inodeFlagPreserveForUndeletion.included(flags), + compressed: inodeFlagCompressed.included(flags), + synchronous: inodeFlagSynchronous.included(flags), + immutable: inodeFlagImmutable.included(flags), + appendOnly: inodeFlagAppendOnly.included(flags), + noDump: inodeFlagNoDump.included(flags), + noAccessTimeUpdate: inodeFlagNoAccessTimeUpdate.included(flags), + dirtyCompressed: inodeFlagDirtyCompressed.included(flags), + compressedClusters: inodeFlagCompressedClusters.included(flags), + noCompress: inodeFlagNoCompress.included(flags), + encryptedInode: inodeFlagEncryptedInode.included(flags), + hashedDirectoryIndexes: inodeFlagHashedDirectoryIndexes.included(flags), + AFSMagicDirectory: inodeFlagAFSMagicDirectory.included(flags), + alwaysJournal: inodeFlagAlwaysJournal.included(flags), + noMergeTail: inodeFlagNoMergeTail.included(flags), + syncDirectoryData: inodeFlagSyncDirectoryData.included(flags), + topDirectory: inodeFlagTopDirectory.included(flags), + hugeFile: inodeFlagHugeFile.included(flags), + usesExtents: inodeFlagUsesExtents.included(flags), + extendedAttributes: inodeFlagExtendedAttributes.included(flags), + blocksPastEOF: inodeFlagBlocksPastEOF.included(flags), + snapshot: inodeFlagSnapshot.included(flags), + deletingSnapshot: inodeFlagDeletingSnapshot.included(flags), + completedSnapshotShrink: inodeFlagCompletedSnapshotShrink.included(flags), + inlineData: inodeFlagInlineData.included(flags), + inheritProject: inodeFlagInheritProject.included(flags), + } +} + +//nolint:unused // will be used in the future, not yet +func (i *inodeFlags) toInt() uint32 { + var flags uint32 + + if i.secureDeletion { + flags |= uint32(inodeFlagSecureDeletion) + } + if i.preserveForUndeletion { + flags |= uint32(inodeFlagPreserveForUndeletion) + } + if i.compressed { + flags |= uint32(inodeFlagCompressed) + } + if i.synchronous { + flags |= uint32(inodeFlagSynchronous) + } + if i.immutable { + flags |= uint32(inodeFlagImmutable) + } + if i.appendOnly { + flags |= uint32(inodeFlagAppendOnly) + } + if i.noDump { + flags |= uint32(inodeFlagNoDump) + } + if i.noAccessTimeUpdate { + flags |= uint32(inodeFlagNoAccessTimeUpdate) + } + if i.dirtyCompressed { + flags |= uint32(inodeFlagDirtyCompressed) + } + if i.compressedClusters { + flags |= uint32(inodeFlagCompressedClusters) + } + if i.noCompress { + flags |= uint32(inodeFlagNoCompress) + } + if i.encryptedInode { + flags |= uint32(inodeFlagEncryptedInode) + } + if i.hashedDirectoryIndexes { + flags |= uint32(inodeFlagHashedDirectoryIndexes) + } + if i.AFSMagicDirectory { + flags |= uint32(inodeFlagAFSMagicDirectory) + } + if i.alwaysJournal { + flags |= uint32(inodeFlagAlwaysJournal) + } + if i.noMergeTail { + flags |= uint32(inodeFlagNoMergeTail) + } + if i.syncDirectoryData { + flags |= uint32(inodeFlagSyncDirectoryData) + } + if i.topDirectory { + flags |= uint32(inodeFlagTopDirectory) + } + if i.hugeFile { + flags |= uint32(inodeFlagHugeFile) + } + if i.usesExtents { + flags |= uint32(inodeFlagUsesExtents) + } + if i.extendedAttributes { + flags |= uint32(inodeFlagExtendedAttributes) + } + if i.blocksPastEOF { + flags |= uint32(inodeFlagBlocksPastEOF) + } + if i.snapshot { + flags |= uint32(inodeFlagSnapshot) + } + if i.deletingSnapshot { + flags |= uint32(inodeFlagDeletingSnapshot) + } + if i.completedSnapshotShrink { + flags |= uint32(inodeFlagCompletedSnapshotShrink) + } + if i.inlineData { + flags |= uint32(inodeFlagInlineData) + } + if i.inheritProject { + flags |= uint32(inodeFlagInheritProject) + } + + return flags +} + +// inodeChecksum calculate the checksum for an inode +func inodeChecksum(b []byte, checksumSeed, inodeNumber, inodeGeneration uint32) uint32 { + numberBytes := make([]byte, 4) + binary.LittleEndian.PutUint32(numberBytes, inodeNumber) + crcResult := crc.CRC32c(checksumSeed, numberBytes) + genBytes := make([]byte, 4) + binary.LittleEndian.PutUint32(genBytes, inodeGeneration) + crcResult = crc.CRC32c(crcResult, genBytes) + checksum := crc.CRC32c(crcResult, b) + return checksum +} diff --git a/filesystem/ext4/journaldevice_other.go b/filesystem/ext4/journaldevice_other.go new file mode 100644 index 00000000..09a61488 --- /dev/null +++ b/filesystem/ext4/journaldevice_other.go @@ -0,0 +1,12 @@ +//go:build !linux && !unix && !darwin && !windows + +package ext4 + +import ( + "fmt" + "runtime" +) + +func journalDevice(devicePath string) (deviceNumber uint32, err error) { + return 0, fmt.Errorf("external journal device unsupported on filesystem %s", runtime.GOOS) +} diff --git a/filesystem/ext4/journaldevice_shared.go b/filesystem/ext4/journaldevice_shared.go new file mode 100644 index 00000000..00a91da9 --- /dev/null +++ b/filesystem/ext4/journaldevice_shared.go @@ -0,0 +1,40 @@ +//go:build linux || unix || freebsd || netbsd || openbsd || darwin + +package ext4 + +import ( + "fmt" + "math" + + "golang.org/x/sys/unix" +) + +func journalDevice(devicePath string) (deviceNumber uint32, err error) { + // Use unix.Stat to get file status + var stat unix.Stat_t + err = unix.Stat(devicePath, &stat) + if err != nil { + return deviceNumber, err + } + + // Extract major and minor device numbers + //nolint:unconvert,nolintlint // lint stumbles on this, thinks it is an unnecessary conversion, which is true + // on Linux, but not on others. So we will be explicit about this, and add a nolint flag + major := unix.Major(uint64(stat.Rdev)) + //nolint:unconvert,nolintlint // lint stumbles on this, thinks it is an unnecessary conversion, which is true + // on Linux, but not on others. So we will be explicit about this, and add a nolint flag + minor := unix.Minor(uint64(stat.Rdev)) + + // Combine major and minor numbers using unix.Mkdev + // interestingly, this does not 100% align with what I read about linux mkdev works, which would be: + // const minorbits = 20 + // func mkdev(major, minor uint32) uint32 { + // return (((major) << minorbits) | (minor)) + // } + // we leave this here for a future potential fix + journalDeviceNumber64 := unix.Mkdev(major, minor) + if journalDeviceNumber64 > math.MaxUint32 { + return deviceNumber, fmt.Errorf("journal device number %d is too large", journalDeviceNumber64) + } + return uint32(journalDeviceNumber64), nil +} diff --git a/filesystem/ext4/journaldevice_windows.go b/filesystem/ext4/journaldevice_windows.go new file mode 100644 index 00000000..bf36fb2e --- /dev/null +++ b/filesystem/ext4/journaldevice_windows.go @@ -0,0 +1,11 @@ +//go:build windows + +package ext4 + +import ( + "errors" +) + +func journalDevice(devicePath string) (deviceNumber uint32, err error) { + return 0, errors.New("external journal device unsupported on Windows") +} diff --git a/filesystem/ext4/md4/md4.go b/filesystem/ext4/md4/md4.go new file mode 100644 index 00000000..77df4270 --- /dev/null +++ b/filesystem/ext4/md4/md4.go @@ -0,0 +1,73 @@ +package md4 + +// rotateLeft rotates a 32-bit integer to the left +func rotateLeft(x uint32, s uint) uint32 { + return (x << s) | (x >> (32 - s)) +} + +// basic MD4 functions +func f(x, y, z uint32) uint32 { + return z ^ (x & (y ^ z)) +} + +func g(x, y, z uint32) uint32 { + return (x & y) + ((x ^ y) & z) +} + +func h(x, y, z uint32) uint32 { + return x ^ y ^ z +} + +// MD4 constants +const ( + k1 uint32 = 0 + k2 uint32 = 0x5A827999 + k3 uint32 = 0x6ED9EBA1 +) + +// round applies the round function as a macro +func round(f func(uint32, uint32, uint32) uint32, a, b, c, d, x uint32, s uint) uint32 { + return rotateLeft(a+f(b, c, d)+x, s) +} + +// halfMD4Transform basic cut-down MD4 transform. Returns only 32 bits of result. +func HalfMD4Transform(buf [4]uint32, in []uint32) uint32 { + var a, b, c, d = buf[0], buf[1], buf[2], buf[3] + + /* Round 1 */ + a = round(f, a, b, c, d, in[0]+k1, 3) + d = round(f, d, a, b, c, in[1]+k1, 7) + c = round(f, c, d, a, b, in[2]+k1, 11) + b = round(f, b, c, d, a, in[3]+k1, 19) + a = round(f, a, b, c, d, in[4]+k1, 3) + d = round(f, d, a, b, c, in[5]+k1, 7) + c = round(f, c, d, a, b, in[6]+k1, 11) + b = round(f, b, c, d, a, in[7]+k1, 19) + + /* Round 2 */ + a = round(g, a, b, c, d, in[1]+k2, 3) + d = round(g, d, a, b, c, in[3]+k2, 5) + c = round(g, c, d, a, b, in[5]+k2, 9) + b = round(g, b, c, d, a, in[7]+k2, 13) + a = round(g, a, b, c, d, in[0]+k2, 3) + d = round(g, d, a, b, c, in[2]+k2, 5) + c = round(g, c, d, a, b, in[4]+k2, 9) + b = round(g, b, c, d, a, in[6]+k2, 13) + + /* Round 3 */ + a = round(h, a, b, c, d, in[3]+k3, 3) + d = round(h, d, a, b, c, in[7]+k3, 9) + c = round(h, c, d, a, b, in[2]+k3, 11) + b = round(h, b, c, d, a, in[6]+k3, 15) + a = round(h, a, b, c, d, in[1]+k3, 3) + d = round(h, d, a, b, c, in[5]+k3, 9) + c = round(h, c, d, a, b, in[0]+k3, 11) + b = round(h, b, c, d, a, in[4]+k3, 15) + + buf[0] += a + buf[1] += b + buf[2] += c + buf[3] += d + + return buf[1] +} diff --git a/filesystem/ext4/md4/md4_test.go b/filesystem/ext4/md4/md4_test.go new file mode 100644 index 00000000..23c2a1e4 --- /dev/null +++ b/filesystem/ext4/md4/md4_test.go @@ -0,0 +1,151 @@ +package md4 + +import ( + "testing" +) + +// Test rotateLeft function +func TestRotateLeft(t *testing.T) { + tests := []struct { + x uint32 + s uint + expect uint32 + }{ + {x: 0x12345678, s: 0, expect: 0x12345678}, + {x: 0x12345678, s: 4, expect: 0x23456781}, + {x: 0x12345678, s: 16, expect: 0x56781234}, + {x: 0x12345678, s: 32, expect: 0x12345678}, + } + + for _, tt := range tests { + result := rotateLeft(tt.x, tt.s) + if result != tt.expect { + t.Errorf("rotateLeft(%#x, %d) = %#x; want %#x", tt.x, tt.s, result, tt.expect) + } + } +} + +// Test f function +func TestF(t *testing.T) { + tests := []struct { + x, y, z uint32 + expect uint32 + }{ + {x: 0xFFFFFFFF, y: 0xAAAAAAAA, z: 0x55555555, expect: 0xAAAAAAAA}, + {x: 0x0, y: 0xAAAAAAAA, z: 0x55555555, expect: 0x55555555}, + {x: 0x12345678, y: 0x9ABCDEF0, z: 0x0FEDCBA9, expect: 0x1ffddff1}, + } + + for _, tt := range tests { + result := f(tt.x, tt.y, tt.z) + if result != tt.expect { + t.Errorf("f(%#x, %#x, %#x) = %#x; want %#x", tt.x, tt.y, tt.z, result, tt.expect) + } + } +} + +// Test g function +func TestG(t *testing.T) { + tests := []struct { + x, y, z uint32 + expect uint32 + }{ + {x: 0xFFFFFFFF, y: 0xAAAAAAAA, z: 0x55555555, expect: 0xffffffff}, + {x: 0x0, y: 0xAAAAAAAA, z: 0x55555555, expect: 0x0}, + {x: 0x12345678, y: 0x9ABCDEF0, z: 0x0FEDCBA9, expect: 0x1abcdef8}, + } + + for _, tt := range tests { + result := g(tt.x, tt.y, tt.z) + if result != tt.expect { + t.Errorf("g(%#x, %#x, %#x) = %#x; want %#x", tt.x, tt.y, tt.z, result, tt.expect) + } + } +} + +// Test h function +func TestH(t *testing.T) { + tests := []struct { + x, y, z uint32 + expect uint32 + }{ + {x: 0xFFFFFFFF, y: 0xAAAAAAAA, z: 0x55555555, expect: 0x0}, + {x: 0x0, y: 0xAAAAAAAA, z: 0x55555555, expect: 0xFFFFFFFF}, + {x: 0x12345678, y: 0x9ABCDEF0, z: 0x0FEDCBA9, expect: 0x87654321}, + } + + for _, tt := range tests { + result := h(tt.x, tt.y, tt.z) + if result != tt.expect { + t.Errorf("h(%#x, %#x, %#x) = %#x; want %#x", tt.x, tt.y, tt.z, result, tt.expect) + } + } +} + +// Test round function +func TestRound(t *testing.T) { + tests := []struct { + name string + f func(x, y, z uint32) uint32 + a, b, c, d uint32 + x uint32 + s uint + expect uint32 + }{ + {"f", f, 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0x12345678, 3, 0x91a2b3b8}, + {"g", g, 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0x12345678, 5, 0x468acee2}, + {"h", h, 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0x12345678, 7, 0x5f4e3d70}, + } + + for _, tt := range tests { + a, b, c, d := tt.a, tt.b, tt.c, tt.d + result := round(tt.f, a, b, c, d, tt.x, tt.s) + if result != tt.expect { + t.Errorf("round(%s, %d) = %#x; want %#x", tt.name, tt.s, result, tt.expect) + } + } +} + +func TestHalfMD4Transform(t *testing.T) { + var buf = [4]uint32{0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476} + tests := []struct { + name string + in [8]uint32 + expect uint32 + }{ + { + name: "Test Case 1", + in: [8]uint32{0, 1, 2, 3, 4, 5, 6, 7}, + expect: 0xF254F422, + }, + { + name: "Test Case 2", + in: [8]uint32{0x12345678, 0x9ABCDEF0, 0x0FEDCBA9, 0x87654321, 0x11223344, 0xAABBCCDD, 0x55667788, 0x99AABBCC}, + expect: 0xA4405E22, + }, + { + name: "Test Case 3", + in: [8]uint32{0x00000000, 0xFFFFFFFF, 0xAAAAAAAA, 0x55555555, 0x33333333, 0x66666666, 0x99999999, 0xCCCCCCCC}, + expect: 0x35B92DEF, + }, + { + name: "Test Case 4 (Empty Input)", + in: [8]uint32{0, 0, 0, 0, 0, 0, 0, 0}, + expect: 0x5B0AA4BE, + }, + { + name: "Test Case 5 (Random Input)", + in: [8]uint32{0x89ABCDEF, 0x01234567, 0xFEDCBA98, 0x76543210, 0xA1B2C3D4, 0x0BADC0DE, 0xDEADBEEF, 0xCAFEBABE}, + expect: 0x2748FDB6, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := HalfMD4Transform(buf, tt.in[:]) + if result != tt.expect { + t.Errorf("halfMD4Transform(%#v, %#v) = %#x; want %#x", buf, tt.in, result, tt.expect) + } + }) + } +} diff --git a/filesystem/ext4/miscflags.go b/filesystem/ext4/miscflags.go new file mode 100644 index 00000000..d2a22368 --- /dev/null +++ b/filesystem/ext4/miscflags.go @@ -0,0 +1,34 @@ +package ext4 + +// miscFlags is a structure holding various miscellaneous flags +type miscFlags struct { + signedDirectoryHash bool + unsignedDirectoryHash bool + developmentTest bool +} + +func parseMiscFlags(flags uint32) miscFlags { + m := miscFlags{ + signedDirectoryHash: flagSignedDirectoryHash.included(flags), + unsignedDirectoryHash: flagUnsignedDirectoryHash.included(flags), + developmentTest: flagTestDevCode.included(flags), + } + return m +} + +func (m *miscFlags) toInt() uint32 { + var flags uint32 + + if m.signedDirectoryHash { + flags |= uint32(flagSignedDirectoryHash) + } + if m.unsignedDirectoryHash { + flags |= uint32(flagUnsignedDirectoryHash) + } + if m.developmentTest { + flags |= uint32(flagTestDevCode) + } + return flags +} + +var defaultMiscFlags = miscFlags{} diff --git a/filesystem/ext4/mountoptions.go b/filesystem/ext4/mountoptions.go new file mode 100644 index 00000000..a93a21a1 --- /dev/null +++ b/filesystem/ext4/mountoptions.go @@ -0,0 +1,182 @@ +package ext4 + +const ( + // default mount options + mountPrintDebugInfo mountOption = 0x1 + mountNewFilesGIDContainingDirectory mountOption = 0x2 + mountUserspaceExtendedAttributes mountOption = 0x4 + mountPosixACLs mountOption = 0x8 + mount16BitUIDs mountOption = 0x10 + mountJournalDataAndMetadata mountOption = 0x20 + mountFlushBeforeJournal mountOption = 0x40 + mountUnorderingDataMetadata mountOption = 0x60 + mountDisableWriteFlushes mountOption = 0x100 + mountTrackMetadataBlocks mountOption = 0x200 + mountDiscardDeviceSupport mountOption = 0x400 + mountDisableDelayedAllocation mountOption = 0x800 +) + +// mountOptions is a structure holding which default mount options are set +type mountOptions struct { + printDebugInfo bool + newFilesGIDContainingDirectory bool + userspaceExtendedAttributes bool + posixACLs bool + use16BitUIDs bool + journalDataAndMetadata bool + flushBeforeJournal bool + unorderingDataMetadata bool + disableWriteFlushes bool + trackMetadataBlocks bool + discardDeviceSupport bool + disableDelayedAllocation bool +} + +type mountOption uint32 + +func (m mountOption) included(a uint32) bool { + return a&uint32(m) == uint32(m) +} + +type MountOpt func(*mountOptions) + +func WithDefaultMountOptionPrintDebuggingInfo(enable bool) MountOpt { + return func(o *mountOptions) { + o.printDebugInfo = enable + } +} + +func WithDefaultMountOptionGIDFromDirectory(enable bool) MountOpt { + return func(o *mountOptions) { + o.newFilesGIDContainingDirectory = enable + } +} + +func WithDefaultMountOptionUserspaceXattrs(enable bool) MountOpt { + return func(o *mountOptions) { + o.userspaceExtendedAttributes = enable + } +} + +func WithDefaultMountOptionPOSIXACLs(enable bool) MountOpt { + return func(o *mountOptions) { + o.posixACLs = enable + } +} + +func WithDefaultMountOptionUID16Bit(enable bool) MountOpt { + return func(o *mountOptions) { + o.use16BitUIDs = enable + } +} + +func WithDefaultMountOptionJournalModeData(enable bool) MountOpt { + return func(o *mountOptions) { + o.journalDataAndMetadata = enable + } +} + +func WithDefaultMountOptionJournalModeOrdered(enable bool) MountOpt { + return func(o *mountOptions) { + o.flushBeforeJournal = enable + } +} + +func WithDefaultMountOptionJournalModeWriteback(enable bool) MountOpt { + return func(o *mountOptions) { + o.unorderingDataMetadata = enable + } +} + +func WithDefaultMountOptionDisableWriteFlushes(enable bool) MountOpt { + return func(o *mountOptions) { + o.disableWriteFlushes = enable + } +} + +func WithDefaultMountOptionBlockValidity(enable bool) MountOpt { + return func(o *mountOptions) { + o.trackMetadataBlocks = enable + } +} + +func WithDefaultMountOptionDiscardSupport(enable bool) MountOpt { + return func(o *mountOptions) { + o.discardDeviceSupport = enable + } +} + +func WithDefaultMountOptionDisableDelayedAllocation(enable bool) MountOpt { + return func(o *mountOptions) { + o.disableDelayedAllocation = enable + } +} + +func defaultMountOptionsFromOpts(opts []MountOpt) *mountOptions { + o := &mountOptions{} + for _, opt := range opts { + opt(o) + } + return o +} + +func parseMountOptions(flags uint32) mountOptions { + m := mountOptions{ + printDebugInfo: mountPrintDebugInfo.included(flags), + newFilesGIDContainingDirectory: mountNewFilesGIDContainingDirectory.included(flags), + userspaceExtendedAttributes: mountUserspaceExtendedAttributes.included(flags), + posixACLs: mountPosixACLs.included(flags), + use16BitUIDs: mount16BitUIDs.included(flags), + journalDataAndMetadata: mountJournalDataAndMetadata.included(flags), + flushBeforeJournal: mountFlushBeforeJournal.included(flags), + unorderingDataMetadata: mountUnorderingDataMetadata.included(flags), + disableWriteFlushes: mountDisableWriteFlushes.included(flags), + trackMetadataBlocks: mountTrackMetadataBlocks.included(flags), + discardDeviceSupport: mountDiscardDeviceSupport.included(flags), + disableDelayedAllocation: mountDisableDelayedAllocation.included(flags), + } + return m +} + +func (m *mountOptions) toInt() uint32 { + var flags uint32 + + if m.printDebugInfo { + flags |= uint32(mountPrintDebugInfo) + } + if m.newFilesGIDContainingDirectory { + flags |= uint32(mountNewFilesGIDContainingDirectory) + } + if m.userspaceExtendedAttributes { + flags |= uint32(mountUserspaceExtendedAttributes) + } + if m.posixACLs { + flags |= uint32(mountPosixACLs) + } + if m.use16BitUIDs { + flags |= uint32(mount16BitUIDs) + } + if m.journalDataAndMetadata { + flags |= uint32(mountJournalDataAndMetadata) + } + if m.flushBeforeJournal { + flags |= uint32(mountFlushBeforeJournal) + } + if m.unorderingDataMetadata { + flags |= uint32(mountUnorderingDataMetadata) + } + if m.disableWriteFlushes { + flags |= uint32(mountDisableWriteFlushes) + } + if m.trackMetadataBlocks { + flags |= uint32(mountTrackMetadataBlocks) + } + if m.discardDeviceSupport { + flags |= uint32(mountDiscardDeviceSupport) + } + if m.disableDelayedAllocation { + flags |= uint32(mountDisableDelayedAllocation) + } + + return flags +} diff --git a/filesystem/ext4/superblock.go b/filesystem/ext4/superblock.go new file mode 100644 index 00000000..1d95a240 --- /dev/null +++ b/filesystem/ext4/superblock.go @@ -0,0 +1,763 @@ +package ext4 + +import ( + "encoding/binary" + "fmt" + "math" + "reflect" + "sort" + "time" + + "github.com/diskfs/go-diskfs/filesystem/ext4/crc" + "github.com/diskfs/go-diskfs/util" + uuid "github.com/satori/go.uuid" +) + +type filesystemState uint16 +type errorBehaviour uint16 +type osFlag uint32 +type feature uint32 +type hashAlgorithm byte +type flag uint32 +type encryptionAlgorithm byte + +func (f feature) included(a uint32) bool { + return a&uint32(f) == uint32(f) +} + +//nolint:unused // we know this is unused, but it will be needed in future +func (f flag) equal(a flag) bool { + return f == a +} +func (f flag) included(a uint32) bool { + return a&uint32(f) == uint32(f) +} + +const ( + // superblockSignature is the signature for every superblock + superblockSignature uint16 = 0xef53 + // optional states for the filesystem + fsStateCleanlyUnmounted filesystemState = 0x0001 + fsStateErrors filesystemState = 0x0002 + fsStateOrphansRecovered filesystemState = 0x0004 + // how to handle erorrs + errorsContinue errorBehaviour = 1 + errorsRemountReadOnly errorBehaviour = 2 + errorsPanic errorBehaviour = 3 + // checksum type + checkSumTypeCRC32c byte = 1 + // oses + osLinux osFlag = 0 + osHurd osFlag = 1 + osMasix osFlag = 2 + osFreeBSD osFlag = 3 + osLites osFlag = 4 + // compatible, incompatible, and compatibleReadOnly feature flags + compatFeatureDirectoryPreAllocate feature = 0x1 + compatFeatureImagicInodes feature = 0x2 + compatFeatureHasJournal feature = 0x4 + compatFeatureExtendedAttributes feature = 0x8 + compatFeatureReservedGDTBlocksForExpansion feature = 0x10 + compatFeatureDirectoryIndices feature = 0x20 + compatFeatureLazyBlockGroup feature = 0x40 + compatFeatureExcludeInode feature = 0x80 + compatFeatureExcludeBitmap feature = 0x100 + compatFeatureSparseSuperBlockV2 feature = 0x200 + compatFeatureFastCommit feature = 0x400 + compatFeatureStableInodes feature = 0x800 + compatFeatureOrphanFile feature = 0x1000 + incompatFeatureCompression feature = 0x1 + incompatFeatureDirectoryEntriesRecordFileType feature = 0x2 + incompatFeatureRecoveryNeeded feature = 0x4 + incompatFeatureSeparateJournalDevice feature = 0x8 + incompatFeatureMetaBlockGroups feature = 0x10 + incompatFeatureExtents feature = 0x40 + incompatFeature64Bit feature = 0x80 + incompatFeatureMultipleMountProtection feature = 0x100 + incompatFeatureFlexBlockGroups feature = 0x200 + incompatFeatureExtendedAttributeInodes feature = 0x400 + incompatFeatureDataInDirectoryEntries feature = 0x1000 + incompatFeatureMetadataChecksumSeedInSuperblock feature = 0x2000 + incompatFeatureLargeDirectory feature = 0x4000 + incompatFeatureDataInInode feature = 0x8000 + incompatFeatureEncryptInodes feature = 0x10000 + roCompatFeatureSparseSuperblock feature = 0x1 + roCompatFeatureLargeFile feature = 0x2 + roCompatFeatureBtreeDirectory feature = 0x4 + roCompatFeatureHugeFile feature = 0x8 + roCompatFeatureGDTChecksum feature = 0x10 + roCompatFeatureLargeSubdirectoryCount feature = 0x20 + roCompatFeatureLargeInodes feature = 0x40 + roCompatFeatureSnapshot feature = 0x80 + roCompatFeatureQuota feature = 0x100 + roCompatFeatureBigalloc feature = 0x200 + roCompatFeatureMetadataChecksums feature = 0x400 + roCompatFeatureReplicas feature = 0x800 + roCompatFeatureReadOnly feature = 0x1000 + roCompatFeatureProjectQuotas feature = 0x2000 + // hash algorithms for htree directory entries + hashLegacy hashAlgorithm = 0x0 + hashHalfMD4 hashAlgorithm = 0x1 + hashTea hashAlgorithm = 0x2 + hashLegacyUnsigned hashAlgorithm = 0x3 + hashHalfMD4Unsigned hashAlgorithm = 0x4 + hashTeaUnsigned hashAlgorithm = 0x5 + // miscellaneous flags + flagSignedDirectoryHash flag = 0x0001 + flagUnsignedDirectoryHash flag = 0x0002 + flagTestDevCode flag = 0x0004 + // encryption algorithms + //nolint:unused // we know these are unused, but they will be needed in the future + encryptionAlgorithmInvalid encryptionAlgorithm = 0 + encryptionAlgorithm256AESXTS encryptionAlgorithm = 1 + encryptionAlgorithm256AESGCM encryptionAlgorithm = 2 + encryptionAlgorithm256AESCBC encryptionAlgorithm = 3 +) + +// journalBackup is a backup in the superblock of the journal's inode i_block[] array and size +type journalBackup struct { + iBlocks [15]uint32 + iSize uint64 +} + +// Superblock is a structure holding the ext4 superblock +type superblock struct { + inodeCount uint32 + blockCount uint64 + reservedBlocks uint64 + freeBlocks uint64 + freeInodes uint32 + firstDataBlock uint32 + blockSize uint32 + clusterSize uint64 + blocksPerGroup uint32 + clustersPerGroup uint32 + inodesPerGroup uint32 + mountTime time.Time + writeTime time.Time + mountCount uint16 + mountsToFsck uint16 + filesystemState filesystemState + errorBehaviour errorBehaviour + minorRevision uint16 + lastCheck time.Time + checkInterval uint32 + creatorOS osFlag + revisionLevel uint32 + reservedBlocksDefaultUID uint16 + reservedBlocksDefaultGID uint16 + firstNonReservedInode uint32 + inodeSize uint16 + blockGroup uint16 + features featureFlags + uuid *uuid.UUID + volumeLabel string + lastMountedDirectory string + algorithmUsageBitmap uint32 + preallocationBlocks byte + preallocationDirectoryBlocks byte + reservedGDTBlocks uint16 + journalSuperblockUUID *uuid.UUID + journalInode uint32 + journalDeviceNumber uint32 + orphanedInodesStart uint32 + hashTreeSeed []uint32 + hashVersion hashAlgorithm + groupDescriptorSize uint16 + defaultMountOptions mountOptions + firstMetablockGroup uint32 + mkfsTime time.Time + journalBackup *journalBackup + // 64-bit mode features + inodeMinBytes uint16 + inodeReserveBytes uint16 + miscFlags miscFlags + raidStride uint16 + multiMountPreventionInterval uint16 + multiMountProtectionBlock uint64 + raidStripeWidth uint32 + logGroupsPerFlex uint64 + checksumType byte + totalKBWritten uint64 + snapshotInodeNumber uint32 + snapshotID uint32 + snapshotReservedBlocks uint64 + snapshotStartInode uint32 + errorCount uint32 + errorFirstTime time.Time + errorFirstInode uint32 + errorFirstBlock uint64 + errorFirstFunction string + errorFirstLine uint32 + errorLastTime time.Time + errorLastInode uint32 + errorLastLine uint32 + errorLastBlock uint64 + errorLastFunction string + errorFirstCode byte + errorLastCode byte + mountOptions string + userQuotaInode uint32 + groupQuotaInode uint32 + overheadBlocks uint32 + backupSuperblockBlockGroups [2]uint32 + encryptionAlgorithms [4]encryptionAlgorithm + encryptionSalt [16]byte + lostFoundInode uint32 + projectQuotaInode uint32 + checksumSeed uint32 + // encoding + filenameCharsetEncoding uint16 + filenameCharsetEncodingFlags uint16 + // inode for tracking orphaned inodes + orphanedInodeInodeNumber uint32 +} + +func (sb *superblock) equal(o *superblock) bool { + if (sb == nil && o != nil) || (o == nil && sb != nil) { + return false + } + if sb == nil && o == nil { + return true + } + return reflect.DeepEqual(sb, o) +} + +// FSInformationSectorFromBytes create an FSInformationSector struct from bytes +func superblockFromBytes(b []byte) (*superblock, error) { + bLen := len(b) + if bLen != int(SuperblockSize) { + return nil, fmt.Errorf("cannot read superblock from %d bytes instead of expected %d", bLen, SuperblockSize) + } + + // check the magic signature + actualSignature := binary.LittleEndian.Uint16(b[0x38:0x3a]) + if actualSignature != superblockSignature { + return nil, fmt.Errorf("erroneous signature at location 0x38 was %x instead of expected %x", actualSignature, superblockSignature) + } + + sb := superblock{} + + // first read feature flags of various types + compatFlags := binary.LittleEndian.Uint32(b[0x5c:0x60]) + incompatFlags := binary.LittleEndian.Uint32(b[0x60:0x64]) + roCompatFlags := binary.LittleEndian.Uint32(b[0x64:0x68]) + // track which ones are set + sb.features = parseFeatureFlags(compatFlags, incompatFlags, roCompatFlags) + + sb.inodeCount = binary.LittleEndian.Uint32(b[0:4]) + + // block count, reserved block count and free blocks depends on whether the fs is 64-bit or not + blockCount := make([]byte, 8) + reservedBlocks := make([]byte, 8) + freeBlocks := make([]byte, 8) + + copy(blockCount[0:4], b[0x4:0x8]) + copy(reservedBlocks[0:4], b[0x8:0xc]) + copy(freeBlocks[0:4], b[0xc:0x10]) + + if sb.features.fs64Bit { + copy(blockCount[4:8], b[0x150:0x154]) + copy(reservedBlocks[4:8], b[0x154:0x158]) + copy(freeBlocks[4:8], b[0x158:0x15c]) + } + sb.blockCount = binary.LittleEndian.Uint64(blockCount) + sb.reservedBlocks = binary.LittleEndian.Uint64(reservedBlocks) + sb.freeBlocks = binary.LittleEndian.Uint64(freeBlocks) + + sb.freeInodes = binary.LittleEndian.Uint32(b[0x10:0x14]) + sb.firstDataBlock = binary.LittleEndian.Uint32(b[0x14:0x18]) + sb.blockSize = uint32(math.Exp2(float64(10 + binary.LittleEndian.Uint32(b[0x18:0x1c])))) + sb.clusterSize = uint64(math.Exp2(float64(binary.LittleEndian.Uint32(b[0x1c:0x20])))) + sb.blocksPerGroup = binary.LittleEndian.Uint32(b[0x20:0x24]) + if sb.features.bigalloc { + sb.clustersPerGroup = binary.LittleEndian.Uint32(b[0x24:0x28]) + } + sb.inodesPerGroup = binary.LittleEndian.Uint32(b[0x28:0x2c]) + // these higher bits are listed as reserved in https://ext4.wiki.kernel.org/index.php/Ext4_Disk_Layout + // but looking at the source to mke2fs, we see that some are used, see + // https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/lib/ext2fs/ext2_fs.h#n653 + // + // mount time has low 32 bits at 0x2c and high 8 bits at 0x274 + // write time has low 32 bits at 0x30 and high 8 bits at 0x275 + // mkfs time has low 32 bits at 0x108 and high 8 bits at 0x276 + // lastcheck time has low 32 bits at 0x40 and high 8 bits at 0x277 + // firsterror time has low 32 bits at 0x198 and high 8 bits at 0x278 + // lasterror time has low 32 bits at 0x1cc and high 8 bits at 0x279 + // firsterror code is 8 bits at 0x27a + // lasterror code is 8 bits at 0x27b + sb.mountTime = bytesToTime(b[0x2c:0x30], b[0x274:0x275]) + sb.writeTime = bytesToTime(b[0x30:0x34], b[0x275:0x276]) + sb.mkfsTime = bytesToTime(b[0x108:0x10c], b[0x276:0x277]) + sb.lastCheck = bytesToTime(b[0x40:0x44], b[0x277:0x278]) + sb.errorFirstTime = bytesToTime(b[0x198:0x19c], b[0x278:0x279]) + sb.errorLastTime = bytesToTime(b[0x1cc:0x1d0], b[0x279:0x280]) + + sb.errorFirstCode = b[0x27a] + sb.errorLastCode = b[0x27b] + + sb.mountCount = binary.LittleEndian.Uint16(b[0x34:0x36]) + sb.mountsToFsck = binary.LittleEndian.Uint16(b[0x36:0x38]) + + sb.filesystemState = filesystemState(binary.LittleEndian.Uint16(b[0x3a:0x3c])) + sb.errorBehaviour = errorBehaviour(binary.LittleEndian.Uint16(b[0x3c:0x3e])) + + sb.minorRevision = binary.LittleEndian.Uint16(b[0x3e:0x40]) + sb.checkInterval = binary.LittleEndian.Uint32(b[0x44:0x48]) + + sb.creatorOS = osFlag(binary.LittleEndian.Uint32(b[0x48:0x4c])) + sb.revisionLevel = binary.LittleEndian.Uint32(b[0x4c:0x50]) + sb.reservedBlocksDefaultUID = binary.LittleEndian.Uint16(b[0x50:0x52]) + sb.reservedBlocksDefaultGID = binary.LittleEndian.Uint16(b[0x52:0x54]) + + sb.firstNonReservedInode = binary.LittleEndian.Uint32(b[0x54:0x58]) + sb.inodeSize = binary.LittleEndian.Uint16(b[0x58:0x5a]) + sb.blockGroup = binary.LittleEndian.Uint16(b[0x5a:0x5c]) + + voluuid, err := uuid.FromBytes(b[0x68:0x78]) + if err != nil { + return nil, fmt.Errorf("unable to read volume UUID: %v", err) + } + sb.uuid = &voluuid + sb.volumeLabel = minString(b[0x78:0x88]) + sb.lastMountedDirectory = minString(b[0x88:0xc8]) + sb.algorithmUsageBitmap = binary.LittleEndian.Uint32(b[0xc8:0xcc]) + + sb.preallocationBlocks = b[0xcc] + sb.preallocationDirectoryBlocks = b[0xcd] + sb.reservedGDTBlocks = binary.LittleEndian.Uint16(b[0xce:0xd0]) + + journaluuid, err := uuid.FromBytes(b[0xd0:0xe0]) + if err != nil { + return nil, fmt.Errorf("unable to read journal UUID: %v", err) + } + sb.journalSuperblockUUID = &journaluuid + sb.journalInode = binary.LittleEndian.Uint32(b[0xe0:0xe4]) + sb.journalDeviceNumber = binary.LittleEndian.Uint32(b[0xe4:0xe8]) + sb.orphanedInodesStart = binary.LittleEndian.Uint32(b[0xe8:0xec]) + + htreeSeed := make([]uint32, 0, 4) + htreeSeed = append(htreeSeed, + binary.LittleEndian.Uint32(b[0xec:0xf0]), + binary.LittleEndian.Uint32(b[0xf0:0xf4]), + binary.LittleEndian.Uint32(b[0xf4:0xf8]), + binary.LittleEndian.Uint32(b[0xf8:0xfc]), + ) + sb.hashTreeSeed = htreeSeed + + sb.hashVersion = hashAlgorithm(b[0xfc]) + + sb.groupDescriptorSize = binary.LittleEndian.Uint16(b[0xfe:0x100]) + + sb.defaultMountOptions = parseMountOptions(binary.LittleEndian.Uint32(b[0x100:0x104])) + sb.firstMetablockGroup = binary.LittleEndian.Uint32(b[0x104:0x108]) + + journalBackupType := b[0xfd] + if journalBackupType == 0 || journalBackupType == 1 { + journalBackupArray := [15]uint32{} + startJournalBackup := 0x10c + for i := 0; i < 15; i++ { + start := startJournalBackup + 4*i + end := startJournalBackup + 4*i + 4 + journalBackupArray[i] = binary.LittleEndian.Uint32(b[start:end]) + } + iSizeBytes := make([]byte, 8) + + copy(iSizeBytes[0:4], b[startJournalBackup+4*16:startJournalBackup+4*17]) + copy(iSizeBytes[4:8], b[startJournalBackup+4*15:startJournalBackup+4*16]) + + sb.journalBackup = &journalBackup{ + iSize: binary.LittleEndian.Uint64(iSizeBytes), + iBlocks: journalBackupArray, + } + } + + sb.inodeMinBytes = binary.LittleEndian.Uint16(b[0x15c:0x15e]) + sb.inodeReserveBytes = binary.LittleEndian.Uint16(b[0x15e:0x160]) + sb.miscFlags = parseMiscFlags(binary.LittleEndian.Uint32(b[0x160:0x164])) + + sb.raidStride = binary.LittleEndian.Uint16(b[0x164:0x166]) + sb.raidStripeWidth = binary.LittleEndian.Uint32(b[0x170:0x174]) + + sb.multiMountPreventionInterval = binary.LittleEndian.Uint16(b[0x166:0x168]) + sb.multiMountProtectionBlock = binary.LittleEndian.Uint64(b[0x168:0x170]) + + sb.logGroupsPerFlex = uint64(math.Exp2(float64(b[0x174]))) + + sb.checksumType = b[0x175] // only valid one is 1 + if sb.checksumType != checkSumTypeCRC32c { + return nil, fmt.Errorf("cannot read superblock: invalid checksum type %d, only valid is %d", sb.checksumType, checkSumTypeCRC32c) + } + + // b[0x176:0x178] are reserved padding + + sb.totalKBWritten = binary.LittleEndian.Uint64(b[0x178:0x180]) + + sb.snapshotInodeNumber = binary.LittleEndian.Uint32(b[0x180:0x184]) + sb.snapshotID = binary.LittleEndian.Uint32(b[0x184:0x188]) + sb.snapshotReservedBlocks = binary.LittleEndian.Uint64(b[0x188:0x190]) + sb.snapshotStartInode = binary.LittleEndian.Uint32(b[0x190:0x194]) + + // errors + sb.errorCount = binary.LittleEndian.Uint32(b[0x194:0x198]) + sb.errorFirstInode = binary.LittleEndian.Uint32(b[0x19c:0x1a0]) + sb.errorFirstBlock = binary.LittleEndian.Uint64(b[0x1a0:0x1a8]) + sb.errorFirstFunction = minString(b[0x1a8:0x1c8]) + sb.errorFirstLine = binary.LittleEndian.Uint32(b[0x1c8:0x1cc]) + sb.errorLastInode = binary.LittleEndian.Uint32(b[0x1d0:0x1d4]) + sb.errorLastLine = binary.LittleEndian.Uint32(b[0x1d4:0x1d8]) + sb.errorLastBlock = binary.LittleEndian.Uint64(b[0x1d8:0x1e0]) + sb.errorLastFunction = minString(b[0x1e0:0x200]) + + sb.mountOptions = minString(b[0x200:0x240]) + sb.userQuotaInode = binary.LittleEndian.Uint32(b[0x240:0x244]) + sb.groupQuotaInode = binary.LittleEndian.Uint32(b[0x244:0x248]) + // overheadBlocks *always* is 0 + sb.overheadBlocks = binary.LittleEndian.Uint32(b[0x248:0x24c]) + sb.backupSuperblockBlockGroups = [2]uint32{ + binary.LittleEndian.Uint32(b[0x24c:0x250]), + binary.LittleEndian.Uint32(b[0x250:0x254]), + } + for i := 0; i < 4; i++ { + sb.encryptionAlgorithms[i] = encryptionAlgorithm(b[0x254+i]) + } + for i := 0; i < 16; i++ { + sb.encryptionSalt[i] = b[0x258+i] + } + sb.lostFoundInode = binary.LittleEndian.Uint32(b[0x268:0x26c]) + sb.projectQuotaInode = binary.LittleEndian.Uint32(b[0x26c:0x270]) + + sb.checksumSeed = binary.LittleEndian.Uint32(b[0x270:0x274]) + // what if the seed is missing? It can be. + if sb.features.metadataChecksums && sb.checksumSeed == 0 { + sb.checksumSeed = crc.CRC32c(0xffffffff, sb.uuid.Bytes()) + } + + sb.filenameCharsetEncoding = binary.LittleEndian.Uint16(b[0x27c:0x27e]) + sb.filenameCharsetEncodingFlags = binary.LittleEndian.Uint16(b[0x27e:0x280]) + sb.orphanedInodeInodeNumber = binary.LittleEndian.Uint32(b[0x280:0x284]) + + // b[0x288:0x3fc] are reserved for zero padding + + // checksum + checksum := binary.LittleEndian.Uint32(b[0x3fc:0x400]) + + // calculate the checksum and validate - we use crc32c + if sb.features.metadataChecksums { + actualChecksum := crc.CRC32c(0xffffffff, b[0:0x3fc]) + if actualChecksum != checksum { + return nil, fmt.Errorf("invalid superblock checksum, actual was %x, on disk was %x, inverted on disk was %x", actualChecksum, checksum, 0xffffffff-checksum) + } + } + + return &sb, nil +} + +// toBytes returns a superblock ready to be written to disk +func (sb *superblock) toBytes() ([]byte, error) { + b := make([]byte, SuperblockSize) + + binary.LittleEndian.PutUint16(b[0x38:0x3a], superblockSignature) + compatFlags, incompatFlags, roCompatFlags := sb.features.toInts() + binary.LittleEndian.PutUint32(b[0x5c:0x60], compatFlags) + binary.LittleEndian.PutUint32(b[0x60:0x64], incompatFlags) + binary.LittleEndian.PutUint32(b[0x64:0x68], roCompatFlags) + + binary.LittleEndian.PutUint32(b[0:4], sb.inodeCount) + + // block count, reserved block count and free blocks depends on whether the fs is 64-bit or not + blockCount := make([]byte, 8) + reservedBlocks := make([]byte, 8) + freeBlocks := make([]byte, 8) + + binary.LittleEndian.PutUint64(blockCount, sb.blockCount) + binary.LittleEndian.PutUint64(reservedBlocks, sb.reservedBlocks) + binary.LittleEndian.PutUint64(freeBlocks, sb.freeBlocks) + + copy(b[0x4:0x8], blockCount[0:4]) + copy(b[0x8:0xc], reservedBlocks[0:4]) + copy(b[0xc:0x10], freeBlocks[0:4]) + + if sb.features.fs64Bit { + copy(b[0x150:0x154], blockCount[4:8]) + copy(b[0x154:0x158], reservedBlocks[4:8]) + copy(b[0x158:0x15c], freeBlocks[4:8]) + } + + binary.LittleEndian.PutUint32(b[0x10:0x14], sb.freeInodes) + binary.LittleEndian.PutUint32(b[0x14:0x18], sb.firstDataBlock) + binary.LittleEndian.PutUint32(b[0x18:0x1c], uint32(math.Log2(float64(sb.blockSize))-10)) + binary.LittleEndian.PutUint32(b[0x1c:0x20], uint32(math.Log2(float64(sb.clusterSize)))) + + binary.LittleEndian.PutUint32(b[0x20:0x24], sb.blocksPerGroup) + if sb.features.bigalloc { + binary.LittleEndian.PutUint32(b[0x24:0x28], sb.clustersPerGroup) + } else { + binary.LittleEndian.PutUint32(b[0x24:0x28], sb.blocksPerGroup) + } + binary.LittleEndian.PutUint32(b[0x28:0x2c], sb.inodesPerGroup) + mountTime := timeToBytes(sb.mountTime) + writeTime := timeToBytes(sb.writeTime) + mkfsTime := timeToBytes(sb.mkfsTime) + lastCheck := timeToBytes(sb.lastCheck) + errorFirstTime := timeToBytes(sb.errorFirstTime) + errorLastTime := timeToBytes(sb.errorLastTime) + + // mount time low bits, high bit + copy(b[0x2c:0x30], mountTime[0:4]) + b[0x274] = mountTime[4] + // write time low bits, high bit + copy(b[0x30:0x34], writeTime[0:4]) + b[0x275] = writeTime[4] + // mkfs time low bits, high bit + copy(b[0x108:0x10c], mkfsTime[0:4]) + b[0x276] = mkfsTime[4] + // last check time low bits, high bit + copy(b[0x40:0x44], lastCheck[0:4]) + b[0x277] = lastCheck[4] + // first error time low bits, high bit + copy(b[0x198:0x19c], errorFirstTime[0:4]) + b[0x278] = errorFirstTime[4] + // last error time low bits, high bit + copy(b[0x1cc:0x1d0], errorLastTime[0:4]) + b[0x279] = errorLastTime[4] + + // error codes + b[0x27a] = sb.errorFirstCode + b[0x27b] = sb.errorLastCode + + binary.LittleEndian.PutUint16(b[0x34:0x36], sb.mountCount) + binary.LittleEndian.PutUint16(b[0x36:0x38], sb.mountsToFsck) + + binary.LittleEndian.PutUint16(b[0x3a:0x3c], uint16(sb.filesystemState)) + binary.LittleEndian.PutUint16(b[0x3c:0x3e], uint16(sb.errorBehaviour)) + + binary.LittleEndian.PutUint16(b[0x3e:0x40], sb.minorRevision) + binary.LittleEndian.PutUint32(b[0x40:0x44], uint32(sb.lastCheck.Unix())) + binary.LittleEndian.PutUint32(b[0x44:0x48], sb.checkInterval) + + binary.LittleEndian.PutUint32(b[0x48:0x4c], uint32(sb.creatorOS)) + binary.LittleEndian.PutUint32(b[0x4c:0x50], sb.revisionLevel) + binary.LittleEndian.PutUint16(b[0x50:0x52], sb.reservedBlocksDefaultUID) + binary.LittleEndian.PutUint16(b[0x52:0x54], sb.reservedBlocksDefaultGID) + + binary.LittleEndian.PutUint32(b[0x54:0x58], sb.firstNonReservedInode) + binary.LittleEndian.PutUint16(b[0x58:0x5a], sb.inodeSize) + binary.LittleEndian.PutUint16(b[0x5a:0x5c], sb.blockGroup) + + if sb.uuid != nil { + copy(b[0x68:0x78], sb.uuid.Bytes()) + } + + ab, err := stringToASCIIBytes(sb.volumeLabel, 16) + if err != nil { + return nil, fmt.Errorf("error converting volume label to bytes: %v", err) + } + copy(b[0x78:0x88], ab[0:16]) + ab, err = stringToASCIIBytes(sb.lastMountedDirectory, 64) + if err != nil { + return nil, fmt.Errorf("error last mounted directory to bytes: %v", err) + } + copy(b[0x88:0xc8], ab[0:64]) + + binary.LittleEndian.PutUint32(b[0xc8:0xcc], sb.algorithmUsageBitmap) + + b[0xcc] = sb.preallocationBlocks + b[0xcd] = sb.preallocationDirectoryBlocks + binary.LittleEndian.PutUint16(b[0xce:0xd0], sb.reservedGDTBlocks) + + if sb.journalSuperblockUUID != nil { + copy(b[0xd0:0xe0], sb.journalSuperblockUUID.Bytes()) + } + + binary.LittleEndian.PutUint32(b[0xe0:0xe4], sb.journalInode) + binary.LittleEndian.PutUint32(b[0xe4:0xe8], sb.journalDeviceNumber) + binary.LittleEndian.PutUint32(b[0xe8:0xec], sb.orphanedInodesStart) + + // to be safe + if len(sb.hashTreeSeed) < 4 { + sb.hashTreeSeed = append(sb.hashTreeSeed, 0, 0, 0, 0) + } + binary.LittleEndian.PutUint32(b[0xec:0xf0], sb.hashTreeSeed[0]) + binary.LittleEndian.PutUint32(b[0xf0:0xf4], sb.hashTreeSeed[1]) + binary.LittleEndian.PutUint32(b[0xf4:0xf8], sb.hashTreeSeed[2]) + binary.LittleEndian.PutUint32(b[0xf8:0xfc], sb.hashTreeSeed[3]) + + b[0xfc] = byte(sb.hashVersion) + + binary.LittleEndian.PutUint16(b[0xfe:0x100], sb.groupDescriptorSize) + + binary.LittleEndian.PutUint32(b[0x100:0x104], sb.defaultMountOptions.toInt()) + binary.LittleEndian.PutUint32(b[0x104:0x108], sb.firstMetablockGroup) + + if sb.journalBackup != nil { + b[0xfd] = 1 + startJournalBackup := 0x10c + for i := 0; i < 15; i++ { + start := startJournalBackup + 4*i + end := startJournalBackup + 4*i + 4 + binary.LittleEndian.PutUint32(b[start:end], sb.journalBackup.iBlocks[i]) + } + + iSizeBytes := make([]byte, 8) + binary.LittleEndian.PutUint64(iSizeBytes, sb.journalBackup.iSize) + copy(b[startJournalBackup+4*16:startJournalBackup+4*17], iSizeBytes[0:4]) + copy(b[startJournalBackup+4*15:startJournalBackup+4*16], iSizeBytes[4:8]) + } + + binary.LittleEndian.PutUint16(b[0x15c:0x15e], sb.inodeMinBytes) + binary.LittleEndian.PutUint16(b[0x15e:0x160], sb.inodeReserveBytes) + binary.LittleEndian.PutUint32(b[0x160:0x164], sb.miscFlags.toInt()) + + binary.LittleEndian.PutUint16(b[0x164:0x166], sb.raidStride) + binary.LittleEndian.PutUint32(b[0x170:0x174], sb.raidStripeWidth) + + binary.LittleEndian.PutUint16(b[0x166:0x168], sb.multiMountPreventionInterval) + binary.LittleEndian.PutUint64(b[0x168:0x170], sb.multiMountProtectionBlock) + + b[0x174] = uint8(math.Log2(float64(sb.logGroupsPerFlex))) + + b[0x175] = sb.checksumType // only valid one is 1 + + // b[0x176:0x178] are reserved padding + + binary.LittleEndian.PutUint64(b[0x178:0x180], sb.totalKBWritten) + + binary.LittleEndian.PutUint32(b[0x180:0x184], sb.snapshotInodeNumber) + binary.LittleEndian.PutUint32(b[0x184:0x188], sb.snapshotID) + binary.LittleEndian.PutUint64(b[0x188:0x190], sb.snapshotReservedBlocks) + binary.LittleEndian.PutUint32(b[0x190:0x194], sb.snapshotStartInode) + + // errors + binary.LittleEndian.PutUint32(b[0x194:0x198], sb.errorCount) + binary.LittleEndian.PutUint32(b[0x19c:0x1a0], sb.errorFirstInode) + binary.LittleEndian.PutUint64(b[0x1a0:0x1a8], sb.errorFirstBlock) + errorFirstFunctionBytes, err := stringToASCIIBytes(sb.errorFirstFunction, 32) + if err != nil { + return nil, fmt.Errorf("error converting errorFirstFunction to bytes: %v", err) + } + copy(b[0x1a8:0x1c8], errorFirstFunctionBytes) + binary.LittleEndian.PutUint32(b[0x1c8:0x1cc], sb.errorFirstLine) + binary.LittleEndian.PutUint32(b[0x1d0:0x1d4], sb.errorLastInode) + binary.LittleEndian.PutUint32(b[0x1d4:0x1d8], sb.errorLastLine) + binary.LittleEndian.PutUint64(b[0x1d8:0x1e0], sb.errorLastBlock) + errorLastFunctionBytes, err := stringToASCIIBytes(sb.errorLastFunction, 32) + if err != nil { + return nil, fmt.Errorf("error converting errorLastFunction to bytes: %v", err) + } + copy(b[0x1e0:0x200], errorLastFunctionBytes) + + mountOptionsBytes, err := stringToASCIIBytes(sb.mountOptions, 64) + if err != nil { + return nil, fmt.Errorf("error converting mountOptions to bytes: %v", err) + } + copy(b[0x200:0x240], mountOptionsBytes) + binary.LittleEndian.PutUint32(b[0x240:0x244], sb.userQuotaInode) + binary.LittleEndian.PutUint32(b[0x244:0x248], sb.groupQuotaInode) + // overheadBlocks *always* is 0 + binary.LittleEndian.PutUint32(b[0x248:0x24c], sb.overheadBlocks) + binary.LittleEndian.PutUint32(b[0x24c:0x250], sb.backupSuperblockBlockGroups[0]) + binary.LittleEndian.PutUint32(b[0x250:0x254], sb.backupSuperblockBlockGroups[1]) + // safety check of encryption algorithms + + for i := 0; i < 4; i++ { + b[0x254+i] = byte(sb.encryptionAlgorithms[i]) + } + for i := 0; i < 16; i++ { + b[0x258+i] = sb.encryptionSalt[i] + } + binary.LittleEndian.PutUint32(b[0x268:0x26c], sb.lostFoundInode) + binary.LittleEndian.PutUint32(b[0x26c:0x270], sb.projectQuotaInode) + + binary.LittleEndian.PutUint32(b[0x270:0x274], sb.checksumSeed) + + binary.LittleEndian.PutUint16(b[0x27c:0x27e], sb.filenameCharsetEncoding) + binary.LittleEndian.PutUint16(b[0x27e:0x280], sb.filenameCharsetEncodingFlags) + binary.LittleEndian.PutUint32(b[0x280:0x284], sb.orphanedInodeInodeNumber) + + // b[0x288:0x3fc] are reserved for zero padding + + // calculate the checksum and validate - we use crc32c + if sb.features.metadataChecksums { + actualChecksum := crc.CRC32c(0xffffffff, b[0:0x3fc]) + binary.LittleEndian.PutUint32(b[0x3fc:0x400], actualChecksum) + } + + return b, nil +} + +func (sb *superblock) gdtChecksumType() gdtChecksumType { + var gdtChecksumTypeInFS gdtChecksumType + switch { + case sb.features.metadataChecksums: + gdtChecksumTypeInFS = gdtChecksumMetadata + case sb.features.gdtChecksum: + gdtChecksumTypeInFS = gdtChecksumGdt + default: + gdtChecksumTypeInFS = gdtChecksumNone + } + return gdtChecksumTypeInFS +} + +func (sb *superblock) blockGroupCount() uint64 { + return sb.blockCount / uint64(sb.blocksPerGroup) +} + +// calculateBackupSuperblocks calculate which block groups should have backup superblocks. +func calculateBackupSuperblockGroups(bgs int64) []int64 { + // calculate which block groups should have backup superblocks + // these are if the block group number is a power of 3, 5, or 7 + var backupGroups []int64 + for i := float64(0); ; i++ { + bg := int64(math.Pow(3, i)) + if bg >= bgs { + break + } + backupGroups = append(backupGroups, bg) + } + for i := float64(0); ; i++ { + bg := int64(math.Pow(5, i)) + if bg >= bgs { + break + } + backupGroups = append(backupGroups, bg) + } + for i := float64(0); ; i++ { + bg := int64(math.Pow(7, i)) + if bg >= bgs { + break + } + backupGroups = append(backupGroups, bg) + } + // sort the backup groups + uniqBackupGroups := util.Uniqify[int64](backupGroups) + sort.Slice(uniqBackupGroups, func(i, j int) bool { + return uniqBackupGroups[i] < uniqBackupGroups[j] + }) + return uniqBackupGroups +} + +func bytesToTime(b ...[]byte) time.Time { + // ensure it is at least 8 bytes + var ( + in [8]byte + count int + ) + for _, v := range b { + toCopy := len(v) + if toCopy+count > len(in) { + toCopy = len(in) - count + } + copied := copy(in[count:], v[:toCopy]) + count += copied + } + return time.Unix(int64(binary.LittleEndian.Uint64(in[:])), 0).UTC() +} + +// timeToBytes convert a time.Time to an 8 byte slice. Guarantees 8 bytes +func timeToBytes(t time.Time) []byte { + timestamp := t.Unix() + var b = make([]byte, 8) + binary.LittleEndian.PutUint64(b, uint64(timestamp)) + return b +} diff --git a/filesystem/ext4/superblock_test.go b/filesystem/ext4/superblock_test.go new file mode 100644 index 00000000..0265988d --- /dev/null +++ b/filesystem/ext4/superblock_test.go @@ -0,0 +1,65 @@ +package ext4 + +import ( + "reflect" + "testing" + + "github.com/go-test/deep" +) + +func TestSuperblockFromBytes(t *testing.T) { + expected, _, b, _, err := testGetValidSuperblockAndGDTs() + if err != nil { + t.Fatalf("Failed to create valid superblock: %v", err) + } + sb, err := superblockFromBytes(b) + if err != nil { + t.Fatalf("Failed to parse superblock bytes: %v", err) + } + + deep.CompareUnexportedFields = true + if diff := deep.Equal(*expected, *sb); diff != nil { + t.Errorf("superblockFromBytes() = %v", diff) + } +} + +func TestSuperblockToBytes(t *testing.T) { + sb, _, expected, _, err := testGetValidSuperblockAndGDTs() + if err != nil { + t.Fatalf("Failed to create valid superblock: %v", err) + } + b, err := sb.toBytes() + if err != nil { + t.Fatalf("Failed to serialize superblock: %v", err) + } + diff, diffString := dumpByteSlicesWithDiffs(b, expected, 32, false, true, true) + if diff { + t.Errorf("superblock.toBytes() mismatched, actual then expected\n%s", diffString) + } +} + +func TestCalculateBackupSuperblocks(t *testing.T) { + tests := []struct { + bgs int64 + expected []int64 + }{ + // Test case 1: Single block group + {bgs: 2, expected: []int64{1}}, + + // Test case 2: Multiple block groups + {bgs: 119, expected: []int64{1, 3, 5, 7, 9, 25, 27, 49, 81}}, + + // Test case 3: Large number of block groups + {bgs: 746, expected: []int64{1, 3, 5, 7, 9, 25, 27, 49, 81, 125, 243, 343, 625, 729}}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + result := calculateBackupSuperblockGroups(tt.bgs) + if !reflect.DeepEqual(result, tt.expected) { + t.Errorf("calculateBackupSuperblockGroups(%d) = %v; want %v", + tt.bgs, result, tt.expected) + } + }) + } +} diff --git a/filesystem/ext4/testdata/.gitignore b/filesystem/ext4/testdata/.gitignore new file mode 100644 index 00000000..849ddff3 --- /dev/null +++ b/filesystem/ext4/testdata/.gitignore @@ -0,0 +1 @@ +dist/ diff --git a/filesystem/ext4/testdata/README.md b/filesystem/ext4/testdata/README.md new file mode 100644 index 00000000..df6e7232 --- /dev/null +++ b/filesystem/ext4/testdata/README.md @@ -0,0 +1,31 @@ +# ext4 Test Fixtures + +This directory contains test fixtures for ext4 filesystems. Specifically, it contains the following files: + +* [buildimg.sh](buildimg.sh): A script to generate the `ext4.img` file and any other files needed for tests +* [README.md](README.md): This file +* [dist](dist): A directory containing the various created artifacts. These are under `.gitignore` and should not be committed to git. + +Because of the size of the ext4 filesystem image `ext4.img`, it is excluded from git. Since all of the +artifacts are generated from it, there is not much point in committing those to git, so those are +ignored as well. + +The artifacts need to be generated anew for each +installation on which you want to test. Of course, each generation can give slightly different +inode information, and certainly will give different timestamps, so you need to update the tests +appropriately; see below. + +To generate the artifacts, including creating the `dist/` directory, run `./buildimg.sh` from within this directory. + +This makes: + +* an ext4 filesystem in an image file `ext4.img`, which contains: + * the `/foo` directory with sufficient entries to require using hash tree directories + * some small and large files in the root +* extracted blocks of the file, such as `superblock.bin` and `gdt.bin` +* the root directory in `root_directory.bin` +* information about the root directory, extracting using `debugfs` from the `ext4.img`, in `root_dir.txt` +* information about the `/foo` directory, extracting using `debugfs` from the `ext4.img`, in `foo_dir.txt` +* information about the superblock and block group table, extracted using `debugfs` from the `ext4.img`, in `stats.txt` + +You **must** create artifacts before running the tests. diff --git a/filesystem/ext4/testdata/buildimg.sh b/filesystem/ext4/testdata/buildimg.sh new file mode 100755 index 00000000..f79d2313 --- /dev/null +++ b/filesystem/ext4/testdata/buildimg.sh @@ -0,0 +1,44 @@ +#!/bin/sh +set -e +mkdir -p dist +cat << "EOF" | docker run -i --rm -v $PWD/dist:/data -w /data --privileged alpine:3.20 +set -e +set -x +apk --update add e2fsprogs e2fsprogs-extra +dd if=/dev/zero of=ext4.img bs=1M count=100 +mkfs.ext4 ext4.img +mount ext4.img /mnt +cd /mnt +mkdir foo +mkdir foo/bar +echo "This is a short file" > shortfile.txt +dd if=/dev/zero of=two-k-file.dat bs=1024 count=2 +dd if=/dev/zero of=six-k-file.dat bs=1024 count=6 +dd if=/dev/zero of=seven-k-file.dat bs=1024 count=7 +dd if=/dev/zero of=ten-meg-file.dat bs=1M count=10 +set +x +i=0; until [ $i -gt 10000 ]; do mkdir foo/dir${i}; i=$(( $i+1 )); done +set -x +# create a file with known content +dd if=/dev/random of=/data/random.dat bs=1024 count=20 +cp /data/random.dat random.dat +# symlink to a file and to a dead-end +ln -s random.dat symlink.dat +ln -s /random.dat absolutesymlink +ln -s nonexistent deadlink +ln -s /some/really/long/path/that/does/not/exist/and/does/not/fit/in/symlink deadlonglink # the target here is >60 chars and so will not fit within the inode +# hardlink +ln random.dat hardlink.dat +cd /data +umount /mnt + +# now get the information we need to build the testdata +debugfs -R 'ls -l /' ext4.img > root_dir.txt +debugfs -R 'ls -l /foo' ext4.img > foo_dir.txt +debugfs -R "cat /" ext4.img > root_directory.bin +dumpe2fs ext4.img > stats.txt +dd if=ext4.img of=gdt.bin bs=1024 count=1 skip=2 +dd if=ext4.img of=superblock.bin bs=1024 count=1 skip=1 +dd if=superblock.bin bs=1 skip=208 count=16 2>/dev/null | hexdump -e '16/1 "%02x" "\n"' > journaluuid.txt +dd if=superblock.bin bs=1 skip=$((0x10c)) count=$((15 * 4)) | hexdump -e '15/4 "0x%08x, " "\n"' > journalinodex.txt +EOF diff --git a/filesystem/ext4/util.go b/filesystem/ext4/util.go new file mode 100644 index 00000000..ae229430 --- /dev/null +++ b/filesystem/ext4/util.go @@ -0,0 +1,106 @@ +package ext4 + +import ( + "fmt" + "strings" +) + +const ( + // KB represents one KB + KB int64 = 1024 + // MB represents one MB + MB int64 = 1024 * KB + // GB represents one GB + GB int64 = 1024 * MB + // TB represents one TB + TB int64 = 1024 * GB + // PB represents one TB + PB int64 = 1024 * TB + // XB represents one Exabyte + XB int64 = 1024 * PB + // these because they are larger than int64 or uint64 can handle + // ZB represents one Zettabyte + // ZB int64 = 1024 * XB + // YB represents one Yottabyte + // YB int64 = 1024 * ZB + // Ext4MaxSize is maximum size of an ext4 filesystem in bytes + // it varies based on the block size and if we are 64-bit or 32-bit mode, but the absolute complete max + // is 64KB per block (128 sectors) in 64-bit mode + // for a max filesystem size of 1YB (yottabyte) + // Ext4MaxSize int64 = YB + // if we ever actually care, we will use math/big to do it + // var xb, ZB, kb, YB big.Int + // kb.SetUint64(1024) + // xb.SetUint64(uint64(XB)) + // ZB.Mul(&xb, &kb) + // YB.Mul(&ZB, &kb) + + // Ext4MinSize is minimum size for an ext4 filesystem + // it assumes a single block group with: + // blocksize = 2 sectors = 1KB + // 1 block for boot code + // 1 block for superblock + // 1 block for block group descriptors + // 1 block for bock and inode bitmaps and inode table + // 1 block for data + // total = 5 blocks + Ext4MinSize int64 = 5 * int64(SectorSize512) + + // volume +) + +func splitPath(p string) []string { + // we need to split such that each one ends in "/", except possibly the last one + parts := strings.Split(p, "/") + // eliminate empty parts + ret := make([]string, 0) + for _, sub := range parts { + if sub != "" { + ret = append(ret, sub) + } + } + return ret +} + +// convert a string to a byte array, if all characters are valid ascii +// always pads to the full length provided in padding. If size is less than the length of the string, it will be truncated +func stringToASCIIBytes(s string, size int) ([]byte, error) { + length := len(s) + b := make([]byte, length) + // convert the name into 11 bytes + r := []rune(s) + // take the first 8 characters + for i := 0; i < length; i++ { + val := int(r[i]) + // we only can handle values less than max byte = 255 + if val > 255 { + return nil, fmt.Errorf("Non-ASCII character in name: %s", s) + } + b[i] = byte(val) + } + if len(b) < size { + // pad with nulls + for i := len(b); i < size; i++ { + b = append(b, 0) + } + } + if len(b) > size { + b = b[:size] + } + return b, nil +} + +// minString convert []byte to string, but drop extraneous 0x0 +func minString(b []byte) string { + // find the last byte that is not 0x0 + if len(b) == 0 { + return "" + } + index := len(b) - 1 + for ; index >= 0; index-- { + if b[index] != 0 { + break + } + } + return string(b[:index+1]) +} diff --git a/filesystem/ext4/util_test.go b/filesystem/ext4/util_test.go new file mode 100644 index 00000000..dfa8c780 --- /dev/null +++ b/filesystem/ext4/util_test.go @@ -0,0 +1,200 @@ +package ext4 + +import ( + "bytes" + "fmt" + "testing" +) + +func TestStringToASCIIBytes(t *testing.T) { + tests := []struct { + s string + size int + expected []byte + err error + }{ + // Test case 1: Empty string + {"", 16, []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, nil}, + + // Test case 2: Short string + {"EXT4", 5, []byte{'E', 'X', 'T', '4', 0}, nil}, + + // Test case 3: Long string + {"EXT4 filesystem", 8, []byte{'E', 'X', 'T', '4', ' ', 'f', 'i', 'l'}, nil}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + result, err := stringToASCIIBytes(tt.s, tt.size) + if err != tt.err { + t.Fatalf("stringToASCIIBytes(%q, %d) error = %v; want %v", tt.s, tt.size, err, tt.err) + } + if !bytes.Equal(result, tt.expected) { + t.Errorf("stringToASCIIBytes(%q, %d) = %v; want %v", tt.s, tt.size, result, tt.expected) + } + }) + } +} + +func TestMinString(t *testing.T) { + tests := []struct { + b []byte + expected string + }{ + // Test case 1: Empty byte slice + {[]byte{}, ""}, + + // Test case 2: Short byte slice + {[]byte{'E', 'X', 'T', '4', 0}, "EXT4"}, + + // Test case 3: Long byte slice + {[]byte{'E', 'X', 'T', '4', ' ', 'f', 'i', 'l'}, "EXT4 fil"}, + + {[]byte{'E', 'X', 'T', '4', ' ', 'f', 'i', 'l', 0}, "EXT4 fil"}, + + {[]byte{'E', 'X', 'T', '4', ' ', 'f', 'i', 'l', 0, 0, 0, 0, 0}, "EXT4 fil"}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + result := minString(tt.b) + if result != tt.expected { + t.Errorf("minString(%v) = %q; want %q", tt.b, result, tt.expected) + } + }) + } +} + +// dumpByteSlice dump a byte slice in hex and optionally ASCII format. +// Optionally but position at the beginning of each row, like xxd. +// Optionally convert to ASCII at end of each row, like xxd. +// Can show positions at beginning of each row in hex, decimal or both. +// Can filter out all rows except those containing given positions in showOnlyBytes. If showOnlyBytes is nil, all rows are shown. +// If showOnlyBytes is not nil, even an empty slice, will only show those rows that contain the given positions. +func dumpByteSlice(b []byte, bytesPerRow int, showASCII, showPosHex, showPosDec bool, showOnlyBytes []int) (out string) { + var ascii []byte + // go through each byte. + // At each position: + // - if we are at the end of a row, print the ASCII representation of the row. + // - if we are at the middle of a row, add an extra space + // - if we are still in the byte slice, print the byte in hex with a space before it. + // - if we are past the end of the row, print spaces. + showOnlyMap := make(map[int]bool) + for _, v := range showOnlyBytes { + showOnlyMap[v] = true + } + // run by rows + numRows := len(b) / bytesPerRow + if len(b)%bytesPerRow != 0 { + numRows++ + } + for i := 0; i < numRows; i++ { + firstByte := i * bytesPerRow + lastByte := firstByte + bytesPerRow + var row string + // row header includes optional position numbers + if showPosHex { + row += fmt.Sprintf("%08x ", firstByte) + } + if showPosDec { + row += fmt.Sprintf("%4d ", firstByte) + } + row += ": " + for j := firstByte; j < lastByte; j++ { + // every 8 bytes add extra spacing to make it easier to read + if j%8 == 0 { + row += " " + } + // regular byte, print in hex + if j < len(b) { + hex := fmt.Sprintf(" %02x", b[j]) + if showOnlyBytes != nil && showOnlyMap[j] { + hex = "\033[1m\033[31m" + hex + "\033[0m" + } + row += hex + } else { + row += " " + } + switch { + case j >= len(b): + // past end of byte slice, print spaces + ascii = append(ascii, ' ') + case b[j] < 32 || b[j] > 126: + // unprintable characters, print a dot + ascii = append(ascii, '.') + default: + // printable characters, print the character + ascii = append(ascii, b[j]) + } + } + // end of row, print the ASCII representation and a newline + if showASCII { + row += fmt.Sprintf(" %s", string(ascii)) + ascii = ascii[:0] + } + row += "\n" + + // calculate if we should include this row + var includeRow = true + if showOnlyBytes != nil { + includeRow = false + for j := firstByte; j < lastByte; j++ { + if showOnlyMap[j] { + includeRow = true + break + } + } + } + if includeRow { + out += row + } + } + return out +} + +// diff +type diff struct { + Offset int + ByteA byte + ByteB byte +} + +// compareByteSlices compares two byte slices position by position. If the byte slices are identical, diffs is length 0, +// otherwise it contains the positions of the differences. +func compareByteSlices(a, b []byte) (diffs []diff) { + maxSize := len(a) + if len(b) > maxSize { + maxSize = len(b) + } + for i := 0; i < maxSize; i++ { + switch { + case i >= len(a): + diffs = append(diffs, diff{Offset: i, ByteA: 0, ByteB: b[i]}) + case i >= len(b): + diffs = append(diffs, diff{Offset: i, ByteA: a[i], ByteB: 0}) + case a[i] != b[i]: + diffs = append(diffs, diff{Offset: i, ByteA: a[i], ByteB: b[i]}) + } + } + return diffs +} + +// dumpByteSlicesWithDiffs show two byte slices in hex and ASCII format, with differences highlighted. +// +//nolint:unparam // sure, bytesPerRow always is 32, but it could be something else +func dumpByteSlicesWithDiffs(a, b []byte, bytesPerRow int, showASCII, showPosHex, showPosDec bool) (different bool, out string) { + diffs := compareByteSlices(a, b) + // if there are no differences, just return an empty string + if len(diffs) == 0 { + return false, "" + } + + showOnlyBytes := make([]int, len(diffs)) + for i, d := range diffs { + showOnlyBytes[i] = d.Offset + } + out = dumpByteSlice(a, bytesPerRow, showASCII, showPosHex, showPosDec, showOnlyBytes) + out += "\n" + out += dumpByteSlice(b, bytesPerRow, showASCII, showPosHex, showPosDec, showOnlyBytes) + return true, out +} diff --git a/filesystem/filesystem.go b/filesystem/filesystem.go index fdd35313..2c1acfa6 100644 --- a/filesystem/filesystem.go +++ b/filesystem/filesystem.go @@ -34,4 +34,6 @@ const ( TypeISO9660 // TypeSquashfs is a squashfs filesystem TypeSquashfs + // TypeExt4 is an ext4 compatible filesystem + TypeExt4 ) diff --git a/go.mod b/go.mod index ac2d2aa9..1127e8e9 100644 --- a/go.mod +++ b/go.mod @@ -1,16 +1,19 @@ module github.com/diskfs/go-diskfs -go 1.19 +go 1.21 require ( + github.com/bits-and-blooms/bitset v1.2.1 github.com/djherbis/times v1.6.0 github.com/elliotwutingfeng/asciiset v0.0.0-20230602022725-51bbb787efab github.com/go-test/deep v1.0.8 github.com/google/uuid v1.3.0 github.com/pierrec/lz4/v4 v4.1.17 github.com/pkg/xattr v0.4.9 + github.com/satori/go.uuid v1.2.0 github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af github.com/ulikunitz/xz v0.5.11 + golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 golang.org/x/sys v0.5.0 ) diff --git a/go.sum b/go.sum index 52b0b56d..c3dc1a03 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/bits-and-blooms/bitset v1.2.1 h1:M+/hrU9xlMp7t4TyTDQW97d3tRPVuKFC6zBEK16QnXY= +github.com/bits-and-blooms/bitset v1.2.1/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -17,6 +19,8 @@ github.com/pkg/xattr v0.4.9 h1:5883YPCtkSd8LFbs13nXplj9g9tlrwoJRjgpgMu1/fE= github.com/pkg/xattr v0.4.9/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= +github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yfB+If0vjp97vuT74F72r8hfRpP8jLU0= github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -24,11 +28,14 @@ github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5Cc github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8= github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/util/uniqify.go b/util/uniqify.go new file mode 100644 index 00000000..c091a6ec --- /dev/null +++ b/util/uniqify.go @@ -0,0 +1,13 @@ +package util + +func Uniqify[T comparable](s []T) []T { + m := make(map[T]bool) + for _, v := range s { + m[v] = true + } + var result = make([]T, 0, len(m)) + for k := range m { + result = append(result, k) + } + return result +}