diff --git a/disk/disk.go b/disk/disk.go index ad16e068..011a8671 100644 --- a/disk/disk.go +++ b/disk/disk.go @@ -13,6 +13,7 @@ import ( log "github.com/sirupsen/logrus" "github.com/diskfs/go-diskfs/filesystem" + "github.com/diskfs/go-diskfs/filesystem/ext4" "github.com/diskfs/go-diskfs/filesystem/fat32" "github.com/diskfs/go-diskfs/filesystem/iso9660" "github.com/diskfs/go-diskfs/filesystem/squashfs" @@ -185,6 +186,8 @@ func (d *Disk) CreateFilesystem(spec FilesystemSpec) (filesystem.FileSystem, err return fat32.Create(d.File, size, start, d.LogicalBlocksize, spec.VolumeLabel) case filesystem.TypeISO9660: return iso9660.Create(d.File, size, start, d.LogicalBlocksize, spec.WorkDir) + case filesystem.TypeExt4: + return ext4.Create(d.File, size, start, d.LogicalBlocksize, nil) case filesystem.TypeSquashfs: return nil, errors.New("squashfs is a read-only filesystem") default: diff --git a/filesystem/ext4/bitmaps.go b/filesystem/ext4/bitmaps.go new file mode 100644 index 00000000..1363ff2b --- /dev/null +++ b/filesystem/ext4/bitmaps.go @@ -0,0 +1,104 @@ +package ext4 + +import "fmt" + +// bitmap is a structure holding a bitmap +type bitmap struct { + bits []byte +} + +// bitmapFromBytes create a bitmap struct from bytes +func bitmapFromBytes(b []byte) *bitmap { + // just copy them over + bits := make([]byte, len(b)) + copy(bits, b) + bm := bitmap{ + bits: bits, + } + + return &bm +} + +// toBytes returns raw bytes ready to be written to disk +func (bm *bitmap) toBytes() []byte { + b := make([]byte, len(bm.bits)) + copy(b, bm.bits) + + return b +} + +func (bm *bitmap) checkFree(location int) (bool, error) { + byteNumber, bitNumber := findBitForIndex(location) + if byteNumber > len(bm.bits) { + return false, fmt.Errorf("location %d is not in %d size bitmap", location, len(bm.bits)*8) + } + mask := byte(0x1) << bitNumber + return bm.bits[byteNumber]&mask == mask, nil +} + +func (bm *bitmap) free(location int) error { + byteNumber, bitNumber := findBitForIndex(location) + if byteNumber > len(bm.bits) { + return fmt.Errorf("location %d is not in %d size bitmap", location, len(bm.bits)*8) + } + mask := byte(0x1) << bitNumber + mask = ^mask + bm.bits[byteNumber] &= mask + return nil +} + +func (bm *bitmap) use(location int) error { + byteNumber, bitNumber := findBitForIndex(location) + if byteNumber > len(bm.bits) { + return fmt.Errorf("location %d is not in %d size bitmap", location, len(bm.bits)*8) + } + mask := byte(0x1) << bitNumber + bm.bits[byteNumber] |= mask + return nil +} + +func (bm *bitmap) findFirstFree() int { + var location = -1 + for i, b := range bm.bits { + // if all used, continue to next + if b&0xff == 0xff { + continue + } + // not all used, so find first bit set to 0 + for j := uint8(0); j < 8; j++ { + mask := byte(0x1) << j + if b&mask != mask { + location = 8*i + (8 - int(j)) + break + } + } + break + } + return location +} + +//nolint:revive // params are unused as of yet, but will be used in the future +func (bm *bitmap) findFirstUsed() int { + var location int = -1 + for i, b := range bm.bits { + // if all free, continue to next + if b == 0x00 { + continue + } + // not all free, so find first bit set to 1 + for j := uint8(0); j < 8; j++ { + mask := byte(0x1) << j + mask = ^mask + if b|mask != mask { + location = 8*i + (8 - int(j)) + break + } + } + break + } + return location +} + +func findBitForIndex(index int) (byteNumber int, bitNumber uint8) { + return index / 8, uint8(index % 8) +} diff --git a/filesystem/ext4/blockgroup.go b/filesystem/ext4/blockgroup.go new file mode 100644 index 00000000..92deb6d4 --- /dev/null +++ b/filesystem/ext4/blockgroup.go @@ -0,0 +1,53 @@ +package ext4 + +import ( + "fmt" +) + +// blockGroup is a structure holding the data about a single block group +// +//nolint:unused // will be used in the future, not yet +type blockGroup struct { + inodeBitmap *bitmap + blockBitmap *bitmap + blockSize int + number int + inodeTableSize int + firstDataBlock int +} + +// blockGroupFromBytes create a blockGroup struct from bytes +// it does not load the inode table or data blocks into memory, rather holding pointers to where they are +// +//nolint:unused // will be used in the future, not yet +func blockGroupFromBytes(b []byte, blockSize, groupNumber int) (*blockGroup, error) { + expectedSize := 2 * blockSize + actualSize := len(b) + if actualSize != expectedSize { + return nil, fmt.Errorf("expected to be passed %d bytes for 2 blocks of size %d, instead received %d", expectedSize, blockSize, actualSize) + } + inodeBitmap := bitmapFromBytes(b[0:blockSize]) + blockBitmap := bitmapFromBytes(b[blockSize : 2*blockSize]) + + bg := blockGroup{ + inodeBitmap: inodeBitmap, + blockBitmap: blockBitmap, + number: groupNumber, + blockSize: blockSize, + } + return &bg, nil +} + +// toBytes returns bitmaps ready to be written to disk +// +//nolint:unused // will be used in the future, not yet +func (bg *blockGroup) toBytes() ([]byte, error) { + b := make([]byte, 2*bg.blockSize) + inodeBitmapBytes := bg.inodeBitmap.toBytes() + blockBitmapBytes := bg.blockBitmap.toBytes() + + b = append(b, inodeBitmapBytes...) + b = append(b, blockBitmapBytes...) + + return b, nil +} diff --git a/filesystem/ext4/directory.go b/filesystem/ext4/directory.go new file mode 100644 index 00000000..ce6e2655 --- /dev/null +++ b/filesystem/ext4/directory.go @@ -0,0 +1,32 @@ +package ext4 + +// Directory represents a single directory in an ext4 filesystem +type Directory struct { + directoryEntry + root bool + entries []*directoryEntry +} + +// dirEntriesFromBytes loads the directory entries from the raw bytes +func (d *Directory) entriesFromBytes(b []byte) error { + entries, err := parseDirEntries(b) + if err != nil { + return err + } + d.entries = entries + return nil +} + +// toBytes convert our entries to raw bytes +func (d *Directory) toBytes(bytesPerBlock int) []byte { + b := make([]byte, 0) + for _, de := range d.entries { + b2 := de.toBytes() + b = append(b, b2...) + } + remainder := len(b) % bytesPerBlock + extra := bytesPerBlock - remainder + zeroes := make([]byte, extra) + b = append(b, zeroes...) + return b +} diff --git a/filesystem/ext4/directoryentry.go b/filesystem/ext4/directoryentry.go new file mode 100644 index 00000000..c3fa067d --- /dev/null +++ b/filesystem/ext4/directoryentry.go @@ -0,0 +1,73 @@ +package ext4 + +import ( + "encoding/binary" + "fmt" +) + +const ( + minDirEntryLength int = 12 // actually 9 for 1-byte file length, but must be multiple of 4 bytes + maxDirEntryLength int = 263 +) + +// directoryEntry is a single directory entry +type directoryEntry struct { + inode uint32 + filename string + fileType fileType +} + +func directoryEntryFromBytes(b []byte) (*directoryEntry, error) { + if len(b) < minDirEntryLength { + return nil, fmt.Errorf("directory entry of length %d is less than minimum %d", len(b), minDirEntryLength) + } + if len(b) > maxDirEntryLength { + return nil, fmt.Errorf("directory entry of length %d is greater than maximum %d", len(b), maxDirEntryLength) + } + + //nolint:gocritic // keep this here for future reference + // length := binary.LittleEndian.Uint16(b[0x4:0x6]) + nameLength := b[0x6] + name := b[0x8 : 0x8+nameLength] + de := directoryEntry{ + inode: binary.LittleEndian.Uint32(b[0x0:0x4]), + fileType: fileType(b[0x7]), + filename: string(name), + } + return &de, nil +} + +func (de *directoryEntry) toBytes() []byte { + // it must be the header length + filename length rounded up to nearest multiple of 4 + nameLength := uint8(len(de.filename)) + entryLength := uint16(nameLength) + 8 + if leftover := entryLength % 4; leftover > 0 { + entryLength += leftover + } + b := make([]byte, 0, entryLength) + + binary.LittleEndian.PutUint32(b[0x0:0x4], de.inode) + binary.LittleEndian.PutUint16(b[0x4:0x6], entryLength) + b[0x6] = nameLength + b[0x7] = byte(de.fileType) + copy(b[0x8:], de.filename) + + return b +} + +// parse the data blocks to get the directory entries +func parseDirEntries(b []byte) ([]*directoryEntry, error) { + entries := make([]*directoryEntry, 4) + count := 0 + for i := 0; i < len(b); count++ { + // read the length of the first entry + length := binary.LittleEndian.Uint16(b[i+0x4 : i+0x6]) + de, err := directoryEntryFromBytes(b[i : i+int(length)]) + if err != nil { + return nil, fmt.Errorf("failed to parse directory entry %d: %v", count, err) + } + entries = append(entries, de) + i += int(length) + } + return entries, nil +} diff --git a/filesystem/ext4/dirhash.go b/filesystem/ext4/dirhash.go new file mode 100644 index 00000000..8717c341 --- /dev/null +++ b/filesystem/ext4/dirhash.go @@ -0,0 +1,157 @@ +package ext4 + +import ( + "github.com/diskfs/go-diskfs/filesystem/ext4/md4" +) + +const ( + teaDelta uint32 = 0x9E3779B9 + k1 uint32 = 0 + k2 uint32 = 0o13240474631 + k3 uint32 = 0o15666365641 + ext4HtreeEOF32 uint32 = ((1 << (32 - 1)) - 1) + ext4HtreeEOF64 uint64 = ((1 << (64 - 1)) - 1) +) + +type hashVersion uint8 + +const ( + HashVersionLegacy = 0 + HashVersionHalfMD4 = 1 + HashVersionTEA = 2 + HashVersionLegacyUnsigned = 3 + HashVersionHalfMD4Unsigned = 4 + HashVersionTEAUnsigned = 5 + HashVersionSIP = 6 +) + +func TEATransform(buf [4]uint32, in []uint32) [4]uint32 { + var sum uint32 + var b0, b1 = buf[0], buf[1] + var a, b, c, d = in[0], in[1], in[2], in[3] + var n = 16 + + for ; n > 0; n-- { + sum += teaDelta + b0 += ((b1 << 4) + a) ^ (b1 + sum) ^ ((b1 >> 5) + b) + b1 += ((b0 << 4) + c) ^ (b0 + sum) ^ ((b0 >> 5) + d) + } + + buf[0] += b0 + buf[1] += b1 + return buf +} + +// the old legacy hash +// +//nolint:unparam,revive // we do not used signed, but we probably should, so leaving until we are sure +func dxHackHash(name string, signed bool) uint32 { + var hash uint32 + var hash0, hash1 uint32 = 0x12a3fe2d, 0x37abe8f9 + b := []byte(name) + + for i := len(b); i > 0; i-- { + // get the specific character + c := int(b[i-1]) + // the value of the individual character depends on if it is signed or not + hash = hash1 + (hash0 ^ uint32(c*7152373)) + + if hash&0x80000000 != 0 { + hash -= 0x7fffffff + } + hash1 = hash0 + hash0 = hash + } + return hash0 << 1 +} + +//nolint:unparam,revive // we do not used signed, but we probably should, so leaving until we are sure +func str2hashbuf(msg string, num int, signed bool) []uint32 { + var buf [8]uint32 + var pad, val uint32 + b := []byte(msg) + size := len(b) + + pad = uint32(size) | (uint32(size) << 8) + pad |= pad << 16 + + val = pad + if size > num*4 { + size = num * 4 + } + var j int + for i := 0; i < size; i++ { + c := int(b[i]) + val = uint32(c) + (val << 8) + if (i % 4) == 3 { + buf[j] = val + val = pad + num-- + j++ + } + } + num-- + if num >= 0 { + buf[j] = val + j++ + } + for num--; num >= 0; num-- { + buf[j] = pad + j++ + } + return buf[:] +} + +func ext4fsDirhash(name string, version hashVersion, seed []uint32) (hash, minorHash uint32) { + /* Initialize the default seed for the hash checksum functions */ + var buf = [4]uint32{0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476} + + // Check to see if the seed is all zero, and if so, use the default + for i, val := range seed { + if val != 0 { + buf[i] = val + } + } + + switch version { + case HashVersionLegacyUnsigned: + hash = dxHackHash(name, false) + case HashVersionLegacy: + hash = dxHackHash(name, true) + case HashVersionHalfMD4Unsigned: + for i := 0; i < len(name); i += 32 { + in := str2hashbuf(name[i:], 8, false) + buf[1] = md4.HalfMD4Transform(buf, in) + } + minorHash = buf[2] + hash = buf[1] + case HashVersionHalfMD4: + for i := 0; i < len(name); i += 32 { + in := str2hashbuf(name[i:], 8, true) + buf[1] = md4.HalfMD4Transform(buf, in) + } + minorHash = buf[2] + hash = buf[1] + case HashVersionTEAUnsigned: + for i := 0; i < len(name); i += 16 { + in := str2hashbuf(name[i:], 4, false) + buf = TEATransform(buf, in) + } + hash = buf[0] + minorHash = buf[1] + case HashVersionTEA: + for i := 0; i < len(name); i += 16 { + in := str2hashbuf(name[i:], 4, true) + buf = TEATransform(buf, in) + } + hash = buf[0] + minorHash = buf[1] + default: + return 0, 0 + } + hash &= ^uint32(1) + if hash == (ext4HtreeEOF32 << 1) { + hash = (ext4HtreeEOF32 - 1) << 1 + } + return hash, minorHash +} diff --git a/filesystem/ext4/dirhash_test.go b/filesystem/ext4/dirhash_test.go new file mode 100644 index 00000000..7299535a --- /dev/null +++ b/filesystem/ext4/dirhash_test.go @@ -0,0 +1 @@ +package ext4 diff --git a/filesystem/ext4/ext4.go b/filesystem/ext4/ext4.go new file mode 100644 index 00000000..f9ceaeba --- /dev/null +++ b/filesystem/ext4/ext4.go @@ -0,0 +1,1061 @@ +package ext4 + +import ( + "encoding/binary" + "errors" + "fmt" + "hash/crc32" + "math" + "os" + "path" + "strings" + "time" + + "github.com/diskfs/go-diskfs/filesystem" + "github.com/diskfs/go-diskfs/util" + uuid "github.com/satori/go.uuid" +) + +// SectorSize indicates what the sector size in bytes is +type SectorSize uint16 + +// BlockSize indicates how many sectors are in a block +type BlockSize uint8 + +// BlockGroupSize indicates how many blocks are in a group, standardly 8*block_size_in_bytes + +const ( + // SectorSize512 is a sector size of 512 bytes, used as the logical size for all ext4 filesystems + SectorSize512 SectorSize = 512 + minBlocksPerGroup uint32 = 256 + BootSectorSize SectorSize = 2 * SectorSize512 + SuperblockSize SectorSize = 2 * SectorSize512 + BlockGroupFactor int = 8 + DefaultInodeRatio int64 = 8192 + DefaultInodeSize int64 = 256 + DefaultReservedBlocksPercent uint8 = 5 + DefaultVolumeName = "diskfs_ext4" + minClusterSize int = 128 + maxClusterSize int = 65529 + bytesPerSlot int = 32 + maxCharsLongFilename int = 13 + maxBlocksPerExtent int = 32768 + million int = 1000000 + billion int = 1000 * million + firstNonReservedInode uint32 = 11 // traditional + + minBlockLogSize int = 10 /* 1024 */ + maxBlockLogSize int = 16 /* 65536 */ + minBlockSize int = (1 << minBlockLogSize) + maxBlockSize int = (1 << maxBlockLogSize) + + max32Num uint64 = math.MaxUint32 + max64Num uint64 = math.MaxUint64 + + maxFilesystemSize32Bit uint64 = 16*2 ^ 40 + maxFilesystemSize64Bit uint64 = 1*2 ^ 60 + + checksumType uint8 = 1 + + // default for log groups per flex group + defaultLogGroupsPerFlex int = 3 + + // fixed inodes + rootInode uint32 = 2 + userQuotaInode uint32 = 3 + groupQuotaInode uint32 = 4 + journalInode uint32 = 8 + lostFoundInode = 11 // traditional +) + +type Params struct { + UUID *uuid.UUID + SectorsPerBlock uint8 + BlocksPerGroup uint32 + InodeRatio int64 + InodeCount uint32 + SparseSuperVersion uint8 + Checksum bool + ClusterSize int64 + ReservedBlocksPercent uint8 + VolumeName string + // JournalDevice external journal device, only checked if WithFeatureSeparateJournalDevice(true) is set + JournalDevice string + LogFlexBlockGroups int + Features []FeatureOpt + DefaultMountOpts []MountOpt +} + +// FileSystem implememnts the FileSystem interface +type FileSystem struct { + bootSector []byte + superblock *superblock + groupDescriptors *groupDescriptors + dataBlockBitmap bitmap + inodeBitmap bitmap + blockGroups int64 + size int64 + start int64 + file util.File +} + +// Equal compare if two filesystems are equal +func (fs *FileSystem) Equal(a *FileSystem) bool { + localMatch := fs.file == a.file + sbMatch := fs.superblock.equal(a.superblock) + gdMatch := fs.groupDescriptors.equal(a.groupDescriptors) + return localMatch && sbMatch && gdMatch +} + +// Create creates an ext4 filesystem in a given file or device +// +// requires the util.File where to create the filesystem, size is the size of the filesystem in bytes, +// start is how far in bytes from the beginning of the util.File to create the filesystem, +// and blocksize is is the logical blocksize to use for creating the filesystem +// +// note that you are *not* required to create the filesystem on the entire disk. You could have a disk of size +// 20GB, and create a small filesystem of size 50MB that begins 2GB into the disk. +// This is extremely useful for creating filesystems on disk partitions. +// +// Note, however, that it is much easier to do this using the higher-level APIs at github.com/diskfs/go-diskfs +// which allow you to work directly with partitions, rather than having to calculate (and hopefully not make any errors) +// where a partition starts and ends. +// +// If the provided blocksize is 0, it will use the default of 512 bytes. If it is any number other than 0 +// or 512, it will return an error. +// +//nolint:gocyclo // yes, this has high cyclomatic complexity, but we can accept it +func Create(f util.File, size, start, sectorsize int64, p *Params) (*FileSystem, error) { + // be safe about the params pointer + if p == nil { + p = &Params{} + } + + // sectorsize must be <=0 or exactly SectorSize512 or error + // because of this, we know we can scale it down to a uint32, since it only can be 512 bytes + if sectorsize != int64(SectorSize512) && sectorsize > 0 { + return nil, fmt.Errorf("sectorsize for ext4 must be either 512 bytes or 0, not %d", sectorsize) + } + var sectorsize32 = uint32(sectorsize) + // there almost are no limits on an ext4 fs - theoretically up to 1 YB + // but we do have to check the max and min size per the requested parameters + // if size < minSizeGivenParameters { + // return nil, fmt.Errorf("requested size is smaller than minimum allowed ext4 size %d for given parameters", minSizeGivenParameters*4) + // } + // if size > maxSizeGivenParameters { + // return nil, fmt.Errorf("requested size is bigger than maximum ext4 size %d for given parameters", maxSizeGivenParameters*4) + // } + + // uuid + fsuuid := p.UUID + if fsuuid == nil { + fsuuid2 := uuid.NewV4() + fsuuid = &fsuuid2 + } + + // blocksize + sectorsPerBlock := p.SectorsPerBlock + userProvidedBlocksize := false + switch { + case sectorsPerBlock > 128 || sectorsPerBlock < 2: + return nil, fmt.Errorf("invalid sectors per block %d, must be between %d and %d sectors", sectorsPerBlock, 2, 128) + case sectorsPerBlock < 1: + sectorsPerBlock = 2 + default: + userProvidedBlocksize = true + } + blocksize := uint32(sectorsPerBlock) * sectorsize32 + + // how many whole blocks is that? + numblocks := size / int64(blocksize) + + // recalculate if it was not user provided + if !userProvidedBlocksize { + sectorsPerBlockR, blocksizeR, numblocksR := recalculateBlocksize(numblocks, size) + _, blocksize, numblocks = uint8(sectorsPerBlockR), blocksizeR, numblocksR + } + + // how many blocks in each block group (and therefore how many block groups) + // if not provided, by default it is 8*blocksize (in bytes) + blocksPerGroup := p.BlocksPerGroup + switch { + case blocksPerGroup <= 0: + blocksPerGroup = blocksize * 8 + case blocksPerGroup < minBlocksPerGroup: + return nil, fmt.Errorf("invalid number of blocks per group %d, must be at least %d", blocksPerGroup, minBlocksPerGroup) + case blocksPerGroup > 8*blocksize: + return nil, fmt.Errorf("invalid number of blocks per group %d, must be no larger than 8*blocksize of %d", blocksPerGroup, blocksize) + case blocksPerGroup%8 != 0: + return nil, fmt.Errorf("invalid number of blocks per group %d, must be divisible by 8", blocksPerGroup) + } + + // how many block groups do we have? + blockGroups := numblocks / int64(blocksPerGroup) + + // track how many free blocks we have + freeBlocks := numblocks + + clusterSize := p.ClusterSize + + // use our inode ratio to determine how many inodes we should have + inodeRatio := p.InodeRatio + if inodeRatio <= 0 { + inodeRatio = DefaultInodeRatio + } + if inodeRatio < int64(blocksize) { + inodeRatio = int64(blocksize) + } + if inodeRatio < clusterSize { + inodeRatio = clusterSize + } + + inodeCount := p.InodeCount + switch { + case inodeCount <= 0: + // calculate how many inodes are needed + inodeCount64 := (numblocks * int64(blocksize)) / inodeRatio + if uint64(inodeCount64) > max32Num { + return nil, fmt.Errorf("requested %d inodes, greater than max %d", inodeCount64, max32Num) + } + inodeCount = uint32(inodeCount64) + case uint64(inodeCount) > max32Num: + return nil, fmt.Errorf("requested %d inodes, greater than max %d", inodeCount, max32Num) + } + + inodesPerGroup := int64(inodeCount) / blockGroups + + // track how many free inodes we have + freeInodes := inodeCount + + // which blocks have superblock and GDT? + var ( + backupSuperblocks []int64 + backupSuperblockGroupsSparse [2]uint32 + ) + // 0 - primary + // ?? - backups + switch p.SparseSuperVersion { + case 2: + // backups in first and last block group + backupSuperblockGroupsSparse = [2]uint32{0, uint32(blockGroups) - 1} + backupSuperblocks = []int64{0, 1, blockGroups - 1} + default: + backupSuperblockGroups := calculateBackupSuperblockGroups(blockGroups) + backupSuperblocks = []int64{0} + for _, bg := range backupSuperblockGroups { + backupSuperblocks = append(backupSuperblocks, bg*int64(blocksPerGroup)) + } + } + + freeBlocks -= int64(len(backupSuperblocks)) + + var firstDataBlock uint32 + if blocksize == 1024 { + firstDataBlock = 1 + } + + /* + size calculations + we have the total size of the disk from `size uint64` + we have the sectorsize fixed at SectorSize512 + + what do we need to determine or calculate? + - block size + - number of blocks + - number of block groups + - block groups for superblock and gdt backups + - in each block group: + - number of blocks in gdt + - number of reserved blocks in gdt + - number of blocks in inode table + - number of data blocks + + config info: + + [defaults] + base_features = sparse_super,large_file,filetype,resize_inode,dir_index,ext_attr + default_mntopts = acl,user_xattr + enable_periodic_fsck = 0 + blocksize = 4096 + inode_size = 256 + inode_ratio = 16384 + + [fs_types] + ext3 = { + features = has_journal + } + ext4 = { + features = has_journal,extent,huge_file,flex_bg,uninit_bg,64bit,dir_nlink,extra_isize + inode_size = 256 + } + ext4dev = { + features = has_journal,extent,huge_file,flex_bg,uninit_bg,inline_data,64bit,dir_nlink,extra_isize + inode_size = 256 + options = test_fs=1 + } + small = { + blocksize = 1024 + inode_size = 128 + inode_ratio = 4096 + } + floppy = { + blocksize = 1024 + inode_size = 128 + inode_ratio = 8192 + } + big = { + inode_ratio = 32768 + } + huge = { + inode_ratio = 65536 + } + news = { + inode_ratio = 4096 + } + largefile = { + inode_ratio = 1048576 + blocksize = -1 + } + largefile4 = { + inode_ratio = 4194304 + blocksize = -1 + } + hurd = { + blocksize = 4096 + inode_size = 128 + } + */ + + // allocate root directory, single inode + freeInodes-- + + // how many reserved blocks? + reservedBlocksPercent := p.ReservedBlocksPercent + if reservedBlocksPercent <= 0 { + reservedBlocksPercent = DefaultReservedBlocksPercent + } + + // are checksums enabled? + gdtChecksumType := gdtChecksumNone + if p.Checksum { + gdtChecksumType = gdtChecksumMetadata + } + + // we do not yet support bigalloc + var clustersPerGroup = blocksPerGroup + + // inodesPerGroup: once we know how many inodes per group, and how many groups + // we will have the total inode count + + volumeName := p.VolumeName + if volumeName == "" { + volumeName = DefaultVolumeName + } + + fflags := defaultFeatureFlags + for _, flagopt := range p.Features { + flagopt(&fflags) + } + + mflags := defaultMiscFlags + + // generate hash seed + hashSeed := uuid.NewV4() + hashSeedBytes := hashSeed.Bytes() + htreeSeed := make([]uint32, 0, 4) + htreeSeed = append(htreeSeed, + binary.LittleEndian.Uint32(hashSeedBytes[:4]), + binary.LittleEndian.Uint32(hashSeedBytes[4:8]), + binary.LittleEndian.Uint32(hashSeedBytes[8:12]), + binary.LittleEndian.Uint32(hashSeedBytes[12:16]), + ) + + // create a UUID for the journal + journalSuperblockUUID := uuid.NewV4() + + // group descriptor size could be 32 or 64, depending on option + var gdSize uint16 + if fflags.fs64Bit { + gdSize = groupDescriptorSize64Bit + } + + var firstMetaBG uint32 + if fflags.metaBlockGroups { + return nil, fmt.Errorf("meta block groups not yet supported") + } + + // calculate the maximum number of block groups + // maxBlockGroups = (maxFSSize) / (blocksPerGroup * blocksize) + var ( + maxBlockGroups uint64 + ) + if fflags.fs64Bit { + maxBlockGroups = maxFilesystemSize64Bit / (uint64(blocksPerGroup) * uint64(blocksize)) + } else { + maxBlockGroups = maxFilesystemSize32Bit / (uint64(blocksPerGroup) * uint64(blocksize)) + } + reservedGDTBlocks := maxBlockGroups * 32 / maxBlockGroups + if reservedGDTBlocks > math.MaxUint16 { + return nil, fmt.Errorf("too many reserved blocks calculated for group descriptor table") + } + + var ( + journalDeviceNumber uint32 + err error + ) + if fflags.separateJournalDevice && p.JournalDevice != "" { + journalDeviceNumber, err = journalDevice(p.JournalDevice) + if err != nil { + return nil, fmt.Errorf("unable to get journal device: %w", err) + } + } + + // get default mount options + mountOptions := defaultMountOptionsFromOpts(p.DefaultMountOpts) + + // initial KB written. This must be adjusted over time to include: + // - superblock itself (1KB bytes) + // - GDT + // - block bitmap (1KB per block group) + // - inode bitmap (1KB per block group) + // - inode tables (inodes per block group * bytes per inode) + // - root directory + + // for now, we just make it 1024 = 1 KB + initialKB := 1024 + + // only set a project quota inode if the feature was enabled + var projectQuotaInode uint32 + if fflags.projectQuotas { + projectQuotaInode = lostFoundInode + 1 + freeInodes-- + } + + // how many log groups per flex group? Depends on if we have flex groups + logGroupsPerFlex := 0 + if fflags.flexBlockGroups { + logGroupsPerFlex = defaultLogGroupsPerFlex + if p.LogFlexBlockGroups > 0 { + logGroupsPerFlex = p.LogFlexBlockGroups + } + } + + // create the superblock - MUST ADD IN OPTIONS + now, epoch := time.Now(), time.Unix(0, 0) + sb := superblock{ + inodeCount: inodeCount, + blockCount: uint64(numblocks), + reservedBlocks: uint64(reservedBlocksPercent) / 100 * uint64(numblocks), + freeBlocks: uint64(freeBlocks), + freeInodes: freeInodes, + firstDataBlock: firstDataBlock, + blockSize: blocksize, + clusterSize: uint64(clusterSize), + blocksPerGroup: blocksPerGroup, + clustersPerGroup: clustersPerGroup, + inodesPerGroup: uint32(inodesPerGroup), + mountTime: now, + writeTime: now, + mountCount: 0, + mountsToFsck: 0, + filesystemState: fsStateCleanlyUnmounted, + errorBehaviour: errorsContinue, + minorRevision: 0, + lastCheck: now, + checkInterval: 0, + creatorOS: osLinux, + revisionLevel: 1, + reservedBlocksDefaultUID: 0, + reservedBlocksDefaultGID: 0, + firstNonReservedInode: firstNonReservedInode, + inodeSize: uint16(DefaultInodeSize), + blockGroup: 0, + features: fflags, + uuid: fsuuid, + volumeLabel: volumeName, + lastMountedDirectory: "/", + algorithmUsageBitmap: 0, // not used in Linux e2fsprogs + preallocationBlocks: 0, // not used in Linux e2fsprogs + preallocationDirectoryBlocks: 0, // not used in Linux e2fsprogs + reservedGDTBlocks: uint16(reservedGDTBlocks), + journalSuperblockUUID: &journalSuperblockUUID, + journalInode: journalInode, + journalDeviceNumber: journalDeviceNumber, + orphanedInodesStart: 0, + hashTreeSeed: htreeSeed, + hashVersion: hashHalfMD4, + groupDescriptorSize: gdSize, + defaultMountOptions: *mountOptions, + firstMetablockGroup: firstMetaBG, + mkfsTime: now, + journalBackup: nil, + // 64-bit mode features + inodeMinBytes: minInodeExtraSize, + inodeReserveBytes: wantInodeExtraSize, + miscFlags: mflags, + raidStride: 0, + multiMountPreventionInterval: 0, + multiMountProtectionBlock: 0, + raidStripeWidth: 0, + checksumType: checksumType, + totalKBWritten: uint64(initialKB), + errorCount: 0, + errorFirstTime: epoch, + errorFirstInode: 0, + errorFirstBlock: 0, + errorFirstFunction: "", + errorFirstLine: 0, + errorLastTime: epoch, + errorLastInode: 0, + errorLastLine: 0, + errorLastBlock: 0, + errorLastFunction: "", + mountOptions: "", // no mount options until it is mounted + encryptionAlgorithms: nil, // no encryption algorithm to start + encryptionSalt: nil, + backupSuperblockBlockGroups: backupSuperblockGroupsSparse, + lostFoundInode: lostFoundInode, + overheadBlocks: 0, + checksumSeed: crc32c(fsuuid.Bytes()), + snapshotInodeNumber: 0, + snapshotID: 0, + snapshotReservedBlocks: 0, + snapshotStartInode: 0, + userQuotaInode: userQuotaInode, + groupQuotaInode: groupQuotaInode, + projectQuotaInode: projectQuotaInode, + logGroupsPerFlex: uint64(logGroupsPerFlex), + } + gdt := groupDescriptors{} + + b, err := sb.toBytes() + if err != nil { + return nil, fmt.Errorf("error converting Superblock to bytes: %v", err) + } + + g := gdt.toBytes(gdtChecksumType, fsuuid.Bytes()) + // how big should the GDT be? + gdSize = groupDescriptorSize + if sb.features.fs64Bit { + gdSize = groupDescriptorSize64Bit + } + gdtSize := int64(gdSize) * numblocks + // write the superblock and GDT to the various locations on disk + for _, bg := range backupSuperblocks { + block := bg * int64(blocksPerGroup) + blockStart := block * int64(blocksize) + // allow that the first one requires an offset + incr := int64(0) + if block == 0 { + incr = int64(SectorSize512) * 2 + } + + // write the superblock + count, err := f.WriteAt(b, incr+blockStart+start) + if err != nil { + return nil, fmt.Errorf("error writing Superblock for block %d to disk: %v", block, err) + } + if count != int(SuperblockSize) { + return nil, fmt.Errorf("wrote %d bytes of Superblock for block %d to disk instead of expected %d", count, block, SuperblockSize) + } + + // write the GDT + count, err = f.WriteAt(g, incr+blockStart+int64(SuperblockSize)+start) + if err != nil { + return nil, fmt.Errorf("error writing GDT for block %d to disk: %v", block, err) + } + if count != int(gdtSize) { + return nil, fmt.Errorf("wrote %d bytes of GDT for block %d to disk instead of expected %d", count, block, gdtSize) + } + } + + // create root directory + // there is nothing in there + return &FileSystem{ + bootSector: []byte{}, + superblock: &sb, + groupDescriptors: &gdt, + blockGroups: blockGroups, + size: size, + start: start, + file: f, + }, nil +} + +// Read reads a filesystem from a given disk. +// +// requires the util.File where to read the filesystem, size is the size of the filesystem in bytes, +// start is how far in bytes from the beginning of the util.File the filesystem is expected to begin, +// and blocksize is is the logical blocksize to use for creating the filesystem +// +// note that you are *not* required to read a filesystem on the entire disk. You could have a disk of size +// 20GB, and a small filesystem of size 50MB that begins 2GB into the disk. +// This is extremely useful for working with filesystems on disk partitions. +// +// Note, however, that it is much easier to do this using the higher-level APIs at github.com/diskfs/go-diskfs +// which allow you to work directly with partitions, rather than having to calculate (and hopefully not make any errors) +// where a partition starts and ends. +// +// If the provided blocksize is 0, it will use the default of 512 bytes. If it is any number other than 0 +// or 512, it will return an error. +func Read(file util.File, size, start, sectorsize int64) (*FileSystem, error) { + // blocksize must be <=0 or exactly SectorSize512 or error + if sectorsize != int64(SectorSize512) && sectorsize > 0 { + return nil, fmt.Errorf("sectorsize for ext4 must be either 512 bytes or 0, not %d", sectorsize) + } + // we do not check for ext4 max size because it is theoreticallt 1YB, which is bigger than an int64! Even 1ZB is! + if size < Ext4MinSize { + return nil, fmt.Errorf("requested size is smaller than minimum allowed ext4 size %d", Ext4MinSize) + } + + // load the information from the disk + // read boot sector code + bs := make([]byte, BootSectorSize) + n, err := file.ReadAt(bs, start) + if err != nil { + return nil, fmt.Errorf("could not read boot sector bytes from file: %v", err) + } + if uint16(n) < uint16(BootSectorSize) { + return nil, fmt.Errorf("only could read %d boot sector bytes from file", n) + } + + // read the superblock + // the superblock is one minimal block, i.e. 2 sectors + superblockBytes := make([]byte, SuperblockSize) + n, err = file.ReadAt(superblockBytes, start+int64(BootSectorSize)) + if err != nil { + return nil, fmt.Errorf("could not read superblock bytes from file: %v", err) + } + if uint16(n) < uint16(SuperblockSize) { + return nil, fmt.Errorf("only could read %d superblock bytes from file", n) + } + + // convert the bytes into a superblock structure + sb, err := superblockFromBytes(superblockBytes) + if err != nil { + return nil, fmt.Errorf("could not interpret superblock data: %v", err) + } + + // now read the GDT + // how big should the GDT be? + numblocks := sb.blockCount + gdSize := groupDescriptorSize + if sb.features.fs64Bit { + gdSize = groupDescriptorSize64Bit + } + gdtSize := int64(gdSize) * int64(numblocks) + + gdtBytes := make([]byte, gdtSize) + n, err = file.ReadAt(gdtBytes, start+int64(BootSectorSize)+int64(SuperblockSize)) + if err != nil { + return nil, fmt.Errorf("could not read Group Descriptor Table bytes from file: %v", err) + } + if int64(n) < gdtSize { + return nil, fmt.Errorf("only could read %d Group Descriptor Table bytes from file instead of %d", n, gdtSize) + } + fsuuid := sb.uuid + if err != nil { + return nil, fmt.Errorf("could not convert uuid %s to uuid bytes: %v", sb.uuid, err) + } + // what kind of checksum are we using? + var gdtChecksumTypeInFS gdtChecksumType + switch { + case sb.features.metadataChecksums: + gdtChecksumTypeInFS = gdtChecksumMetadata + case sb.features.gdtChecksum: + gdtChecksumTypeInFS = gdtChecksumGdt + default: + gdtChecksumTypeInFS = gdtChecksumNone + } + gdt, err := groupDescriptorsFromBytes(gdtBytes, sb.features.fs64Bit, fsuuid.Bytes(), gdtChecksumTypeInFS) + if err != nil { + return nil, fmt.Errorf("could not interpret Group Descriptor Table data: %v", err) + } + + return &FileSystem{ + bootSector: bs, + superblock: sb, + groupDescriptors: gdt, + blockGroups: int64(numblocks), + size: size, + start: start, + file: file, + }, nil +} + +// Type returns the type code for the filesystem. Always returns filesystem.TypeExt4 +func (fs *FileSystem) Type() filesystem.Type { + return filesystem.TypeExt4 +} + +// Mkdir make a directory at the given path. It is equivalent to `mkdir -p`, i.e. idempotent, in that: +// +// * It will make the entire tree path if it does not exist +// * It will not return an error if the path already exists +func (fs *FileSystem) Mkdir(p string) error { + _, _, err := fs.readDirWithMkdir(p, true) + // we are not interesting in returning the entries + return err +} + +// ReadDir return the contents of a given directory in a given filesystem. +// +// Returns a slice of os.FileInfo with all of the entries in the directory. +// +// Will return an error if the directory does not exist or is a regular file and not a directory +func (fs *FileSystem) ReadDir(p string) ([]os.FileInfo, error) { + _, entries, err := fs.readDirWithMkdir(p, false) + if err != nil { + return nil, fmt.Errorf("error reading directory %s: %v", p, err) + } + // once we have made it here, looping is done. We have found the final entry + // we need to return all of the file info + count := len(entries) + ret := make([]os.FileInfo, count) + for i, e := range entries { + in, err := fs.readInode(e.inode) + if err != nil { + return nil, fmt.Errorf("could not read inode %d at position %d in directory: %v", e.inode, i, err) + } + ret[i] = FileInfo{ + modTime: in.modifyTime, + name: e.filename, + size: int64(in.size), + isDir: e.fileType&fileTypeDirectory == fileTypeDirectory, + } + } + + return ret, nil +} + +// OpenFile returns an io.ReadWriter from which you can read the contents of a file +// or write contents to the file +// +// accepts normal os.OpenFile flags +// +// returns an error if the file does not exist +func (fs *FileSystem) OpenFile(p string, flag int) (filesystem.File, error) { + // get the path + dir := path.Dir(p) + filename := path.Base(p) + // if the dir == filename, then it is just / + if dir == filename { + return nil, fmt.Errorf("cannot open directory %s as file", p) + } + // get the directory entries + parentDir, entries, err := fs.readDirWithMkdir(dir, false) + if err != nil { + return nil, fmt.Errorf("could not read directory entries for %s", dir) + } + // we now know that the directory exists, see if the file exists + var targetEntry *directoryEntry + for _, e := range entries { + if e.filename != filename { + continue + } + // cannot do anything with directories + if e.fileType&fileTypeDirectory == fileTypeDirectory { + return nil, fmt.Errorf("cannot open directory %s as file", p) + } + // if we got this far, we have found the file + targetEntry = e + } + + // see if the file exists + // if the file does not exist, and is not opened for os.O_CREATE, return an error + if targetEntry == nil { + if flag&os.O_CREATE == 0 { + return nil, fmt.Errorf("target file %s does not exist and was not asked to create", p) + } + // else create it + targetEntry, err = fs.mkFile(parentDir, filename) + if err != nil { + return nil, fmt.Errorf("failed to create file %s: %v", p, err) + } + } + // get the inode + inodeNumber := targetEntry.inode + inode, err := fs.readInode(inodeNumber) + if err != nil { + return nil, fmt.Errorf("could not read inode number %d: %v", inodeNumber, err) + } + offset := int64(0) + if flag&os.O_APPEND == os.O_APPEND { + offset = int64(inode.size) + } + // when we open a file, we load the inode but also all of the extents + extents, err := inode.extents.blocks(fs) + if err != nil { + return nil, fmt.Errorf("could not read extent tree for inode %d: %v", inodeNumber, err) + } + return &File{ + directoryEntry: targetEntry, + inode: inode, + isReadWrite: flag&os.O_RDWR != 0, + isAppend: flag&os.O_APPEND != 0, + offset: offset, + filesystem: fs, + extents: extents, + }, nil +} + +// Label read the volume label +func (fs *FileSystem) Label() string { + if fs.superblock == nil { + return "" + } + return fs.superblock.volumeLabel +} + +// SetLabel changes the label on the writable filesystem. Different file system may hav different +// length constraints. +// +//nolint:revive // will use params when read-write +func (fs *FileSystem) SetLabel(label string) error { + return errors.New("cannot set label, filesystem currently read-only") +} + +// readInode read a single inode from disk +func (fs *FileSystem) readInode(inodeNumber uint32) (*inode, error) { + sb := fs.superblock + inodeSize := sb.inodeSize + inodesPerGroup := sb.inodesPerGroup + // figure out which block group the inode is on + bg := (inodeNumber - 1) / inodesPerGroup + // read the group descriptor to find out the location of the inode table + gd := fs.groupDescriptors.descriptors[bg] + inodeTableBlock := gd.inodeTableLocation + inodeBytes := make([]byte, inodeSize) + // bytesStart is beginning byte for the inodeTableBlock + byteStart := inodeTableBlock * uint64(sb.blockSize) + // offsetInode is how many inodes in our inode is + offsetInode := (inodeNumber - 1) % inodesPerGroup + // offset is how many bytes in our inode is + offset := offsetInode * uint32(inodeSize) + read, err := fs.file.ReadAt(inodeBytes, int64(byteStart)+int64(offset)) + if err != nil { + return nil, fmt.Errorf("failed to read inode %d from offset %d of block %d from block group %d: %v", inodeNumber, offset, inodeTableBlock, bg, err) + } + if read != int(inodeSize) { + return nil, fmt.Errorf("read %d bytes for inode %d instead of inode size of %d", read, inodeNumber, inodeSize) + } + return inodeFromBytes(inodeBytes, sb, inodeNumber) +} + +// writeInode write a single inode to disk +func (fs *FileSystem) writeInode(i *inode) error { + sb := fs.superblock + inodeSize := sb.inodeSize + inodesPerGroup := sb.inodesPerGroup + // figure out which block group the inode is on + bg := (i.number - 1) / inodesPerGroup + // read the group descriptor to find out the location of the inode table + gd := fs.groupDescriptors.descriptors[bg] + inodeTableBlock := gd.inodeTableLocation + // bytesStart is beginning byte for the inodeTableBlock + // byteStart := inodeTableBlock * sb.blockSize + // offsetInode is how many inodes in our inode is + offsetInode := (i.number - 1) % inodesPerGroup + // offset is how many bytes in our inode is + offset := int64(offsetInode) * int64(inodeSize) + inodeBytes := i.toBytes(sb) + wrote, err := fs.file.WriteAt(inodeBytes, offset) + if err != nil { + return fmt.Errorf("failed to write inode %d at offset %d of block %d from block group %d: %v", i.number, offset, inodeTableBlock, bg, err) + } + if wrote != int(inodeSize) { + return fmt.Errorf("wrote %d bytes for inode %d instead of inode size of %d", wrote, i.number, inodeSize) + } + return nil +} + +// read directory entries for a given directory +func (fs *FileSystem) readDirectory(dir *Directory) ([]*directoryEntry, error) { + // read the inode for the directory + in, err := fs.readInode(dir.directoryEntry.inode) + if err != nil { + return nil, fmt.Errorf("could not read inode %d for directory: %v", dir.directoryEntry.inode, err) + } + // read the contents of the file across all blocks + b, err := fs.readFileBytes(in) + if err != nil { + return nil, fmt.Errorf("error reading file bytes for inode %d: %v", in.number, err) + } + + // convert into directory entries + return parseDirEntries(b) +} + +// readFileBytes read all of the bytes for an individual file pointed at by a given inode +// normally not very useful, but helpful when reading a directory +func (fs *FileSystem) readFileBytes(in *inode) ([]byte, error) { + // convert the extent tree into a sorted list of extents + extents, err := in.extents.blocks(fs) + if err != nil { + return nil, fmt.Errorf("unable to get blocks for inode %d: %w", in.number, err) + } + // walk through each one, gobbling up the bytes + b := make([]byte, fs.superblock.blockSize) + for i, e := range extents { + start := e.startingBlock * uint64(fs.superblock.blockSize) + count := uint64(e.count) * uint64(fs.superblock.blockSize) + b2 := make([]byte, count) + read, err := fs.file.ReadAt(b2, int64(start)) + if err != nil { + return nil, fmt.Errorf("failed to read bytes for extent %d: %v", i, err) + } + if read != int(count) { + return nil, fmt.Errorf("read %d bytes instead of %d for extent %d", read, count, i) + } + b = append(b, b2...) + } + return b, nil +} + +//nolint:revive // params are unused because this still is read-only, but it will be read-write at some point +func (fs *FileSystem) writeDirectoryEntries(dir *Directory) error { + return errors.New("unsupported write directory entries, currently read-only") +} + +// make a file +// +//nolint:revive // params are unused because this still is read-only, but it will be read-write at some point +func (fs *FileSystem) mkFile(parent *Directory, name string) (*directoryEntry, error) { + return nil, errors.New("unsupported to create a file, currently read-only") +} + +// readDirWithMkdir - walks down a directory tree to the last entry +// if it does not exist, it may or may not make it +func (fs *FileSystem) readDirWithMkdir(p string, doMake bool) (*Directory, []*directoryEntry, error) { + paths := splitPath(p) + + // walk down the directory tree until all paths have been walked or we cannot find something + // start with the root directory + var entries []*directoryEntry + currentDir := &Directory{ + directoryEntry: directoryEntry{ + inode: rootInode, + filename: "", + fileType: fileTypeDirectory, + }, + } + entries, err := fs.readDirectory(currentDir) + if err != nil { + return nil, nil, fmt.Errorf("failed to read directory %s", "/") + } + for i, subp := range paths { + // do we have an entry whose name is the same as this name? + found := false + for _, e := range entries { + if e.filename != subp { + continue + } + if e.fileType != fileTypeDirectory { + return nil, nil, fmt.Errorf("cannot create directory at %s since it is a file", "/"+strings.Join(paths[0:i+1], "/")) + } + // the filename matches, and it is a subdirectory, so we can break after saving the directory entry, which contains the inode + found = true + currentDir = &Directory{ + directoryEntry: *e, + } + break + } + + // if not, either make it, retrieve its cluster and entries, and loop; + // or error out + if !found { + if doMake { + var subdirEntry *directoryEntry + subdirEntry, err = fs.mkSubdir(currentDir, subp) + if err != nil { + return nil, nil, fmt.Errorf("failed to create subdirectory %s", "/"+strings.Join(paths[0:i+1], "/")) + } + // write the directory entries to disk + err = fs.writeDirectoryEntries(currentDir) + if err != nil { + return nil, nil, fmt.Errorf("error writing directory entries to disk: %v", err) + } + // save where we are to search next + currentDir = &Directory{ + directoryEntry: *subdirEntry, + } + } else { + return nil, nil, fmt.Errorf("path %s not found", "/"+strings.Join(paths[0:i+1], "/")) + } + } + // get all of the entries in this directory + entries, err = fs.readDirectory(currentDir) + if err != nil { + return nil, nil, fmt.Errorf("failed to read directory %s", "/"+strings.Join(paths[0:i+1], "/")) + } + } + // once we have made it here, looping is done; we have found the final entry + return currentDir, entries, nil +} + +// readBlock read a single block from disk +func (fs *FileSystem) readBlock(blockNumber uint64) ([]byte, error) { + sb := fs.superblock + // bytesStart is beginning byte for the inodeTableBlock + byteStart := blockNumber * uint64(sb.blockSize) + blockBytes := make([]byte, sb.blockSize) + read, err := fs.file.ReadAt(blockBytes, int64(byteStart)) + if err != nil { + return nil, fmt.Errorf("failed to read block %d: %v", blockNumber, err) + } + if read != int(sb.blockSize) { + return nil, fmt.Errorf("read %d bytes for block %d instead of size of %d", read, blockNumber, sb.blockSize) + } + return blockBytes, nil +} + +func crc32c(b []byte) uint32 { + // Define the CRC32C table using the Castagnoli polynomial + crc32cTable := crc32.MakeTable(crc32.Castagnoli) + + // Initialize the CRC32C calculation with the seed value 0xFFFFFFFF + seed := uint32(0xFFFFFFFF) + + // Compute the CRC32C checksum + return crc32.Update(seed, crc32cTable, b) +} + +// recalculate blocksize based on the existing number of blocks +// - 0 <= blocks < 3MM : floppy - blocksize = 1024 +// - 3MM <= blocks < 512MM : small - blocksize = 1024 +// - 512MM <= blocks < 4*1024*1024MM : default - blocksize = +// - 4*1024*1024MM <= blocks < 16*1024*1024MM : big - blocksize = +// - 16*1024*1024MM <= blocks : huge - blocksize = +// +// the original code from e2fsprogs https://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git/tree/misc/mke2fs.c +func recalculateBlocksize(numblocks, size int64) (sectorsPerBlock int, blocksize uint32, numBlocksAdjusted int64) { + var ( + million64 = int64(million) + sectorSize512 = uint32(SectorSize512) + ) + switch { + case 0 <= numblocks && numblocks < 3*million64: + sectorsPerBlock = 2 + blocksize = 2 * sectorSize512 + case 3*million64 <= numblocks && numblocks < 512*million64: + sectorsPerBlock = 2 + blocksize = 2 * sectorSize512 + case 512*million64 <= numblocks && numblocks < 4*1024*1024*million64: + sectorsPerBlock = 2 + blocksize = 2 * sectorSize512 + case 4*1024*1024*million64 <= numblocks && numblocks < 16*1024*1024*million64: + sectorsPerBlock = 2 + blocksize = 2 * sectorSize512 + case numblocks > 16*1024*1024*million64: + sectorsPerBlock = 2 + blocksize = 2 * sectorSize512 + } + return sectorsPerBlock, blocksize, size / int64(blocksize) +} + +// mkSubdir make a subdirectory of a given name inside the parent +// +//nolint:revive // params are unused because this still is read-only, but it will be read-write at some point +func (fs *FileSystem) mkSubdir(parent *Directory, name string) (*directoryEntry, error) { + return nil, errors.New("mksubdir not yet supported") +} diff --git a/filesystem/ext4/ext4.md b/filesystem/ext4/ext4.md new file mode 100644 index 00000000..26663db0 --- /dev/null +++ b/filesystem/ext4/ext4.md @@ -0,0 +1,285 @@ +# ext4 +This file describes the layout on disk of ext4. It is a living document and probably will be deleted rather than committed to git. + +The primary reference document is [here](https://ext4.wiki.kernel.org/index.php/Ext4_Disk_Layout#Overview), while helpful examples are [here](https://digital-forensics.sans.org/blog/2017/06/07/understanding-ext4-part-6-directories) and [here](https://metebalci.com/blog/a-minimum-complete-tutorial-of-linux-ext4-file-system/). + +This [blog series](https://www.sans.org/blog/understanding-ext4-part-6-directories/) is super helpful. + +## Concepts + +* Sector: a section of 512 bytes +* Block: a contiguous group of sectors. Block size usually is either 4K (4096 bytes) or 1K (1024 bytes), i.e. 8 sectors or 2 sectors. Block size minimum is 1KB (2 sectors), max is 64KB (128 sectors). Each block is associated with exactly one file. A file may contain more than one block - e.g. if a file is larger than the size of a single block - but each block belongs to exactly one file. +* inode: metadata about a file or directory. Each inode contains metadata about exactly one file. The number of inodes in a system is identical to the number of blocks for 32-bit, or far fewer for 64-bit. +* Block group: a contiguous group of blocks. Each block group is (`8*block_size_in_bytes`) blocks. So if block size is 4K, or 4096 bytes, then a block group is `8*4096` = 32,768 blocks, each of size 4096 bytes, for a block group of 128MB. If block size is 1K, a block group is 8192 blocks, or 8MB. +* 64-bit feature: ext4 filesystems normally uses 32-bit, which means the maximum blocks per filesystem is 2^32. If the 64-bit feature is enabled, then the maximum blocks per filesystem is 2^64. +* Superblock: A block that contains information about the entire filesystem. Exists in block group 0 and sometimes is backed up to other block groups. The superblock contains information about the filesystem as a whole: inode size, block size, last mount time, etc. +* Block Group Descriptor: Block Group Descriptors contain information about each block group: start block, end block, inodes, etc. One Descriptor per Group. But it is stored next to the Superblock (and backups), not with each Group. +* Extent: an extent is a contiguous group of blocks. Extents are used to store files. Extents are mapped beginning with the inode, and provide the way of getting from an inode to the blocks that contain the file's data. + + +### Block Group + +Each block group is built in the following order. There is a distinction between Group 0 - the first one +in the filesystem - and all others. + +Block groups come in one of several types. It isn't necessary to list all of them here. The key elements are as follows. + +Block 0: + +1. Padding: 1024 bytes, used for boot sector + +Block 0 (above 1024 bytes, if blocksize >1024) or Block 1; all backup blocks: + +2. Superblock: One block +3. Group Descriptors: Many blocks +4. Reserved GDT Blocks: Many blocks, reserved in case we need to expand to more Group Descriptors in the future + +All blocks: + +5. Data block bitmap: 1 block. One bit per block in the block group. Set to 1 if a data block is in use, 0 if not. +6. inode bitmap: 1 block. One bit per inode in the block group. Set to 1 if an inode is in use, 0 if not. +7. inode table: many blocks. Calculated by `(inodes_per_group)*(size_of_inode)`. Remember that `inodes_per_group` = `blocks_per_group` = `8*block_size_in_bytes`. The original `size_of_inode` in ext2 was 128 bytes. In ext4 it uses 156 bytes, but is stored in 256 bytes of space, so `inode_size_in_bytes` = 256 bytes. +8. Data blocks: all of the rest of the blocks in the block group + +The variant on the above is with Flexible Block Groups. If flexbg is enabled, then block groups are grouped together, normally +groups of 16 (but the actual number is in the superblock). The data block bitmap, inode bitmap and inode table are +in the first block group for each flexible block group. + +This means you can have all sorts of combinations: + +* block that is both first in a block group (contains block bitmap, inode bitmap, inode table) and superblock/backup (contains superblock, GDT, reserved GDT blocks) +* block that is first in a block group (block bitmap, inode bitmap, inode table) but not first in a block group or Flex BG +* block that is superblock/backup (superblock, GDT, reserved GDT blocks) but not first in a block group or Flex BG +* neither of the above (contains just data blocks) + +Summary: block bitmap, inode bitmap and inode table are in the first block in a blockgroup or Flex BG, which is a consistent +number. Superblock backups are in specific blocks, calculated by being a block number that is a power of 3, 5 or 7. + +## How to + +Different actions. These all will be replaced by actual code. Things we need to be able to do: + +* walk the tree to a particular directory or file +* inode to data blocks +* read directory entries +* create a new directory entry +* read contents of a file +* write contents to a file + +### Walk the Tree + +In order to get to any particular file or directory in the ext4 filesystem, you need to "walk the tree". +For example, say you want to read the contents of directory `/usr/local/bin/`. + +1. Find the inode of the root directory in the inode table. This **always** is inode 2. +1. Read inode of the root directory to get the data blocks that contain the contents of the root directory. See [inode to data blocks](#inode-to-data-blocks). +1. Read the directory entries in the data blocks to get the names of the files and directories in root. This can be linear or hash. + * linear: read sequentially until you find the one whose name matches the desired subdirectory, for example `usr` + * hash: hash the name and use that to get the correct location +1. Using the matched directory entry, get the inode number for that subdirectory. +1. Use the superblock to read how many inodes are in each block group, e.g. 8144 +1. Calculate which block group contains the inode you are looking for. Using the above example, 0-8143 are in group 0, 8144-16287 are in group 1, etc. +1. Read the inode of that subdirectory in the inode table of the given block group to get the data blocks that contain the contents of that directory. +1. Repeat until you have read the data blocks for the desired entry. + +### Inode to Data Blocks + +Start with the inode + +1. Read the inode +1. Read the `i_block` value, 60 bytes at location 0x28 (= 40) +1. The first 12 bytes are an extent header: + * magic number 0xf30a (little endian) - 2 bytes + * number of entries following the header - 2 bytes - in the inode, always 1, 2, 3, or 4 + * maximum number of entries that could follow the header - 2 bytes - in the inode, always 4 + * depth of this node in the extent tree, where 0 = leaf, parent to that is 1, etc. - 2 bytes + * generation (unused) - 4 bytes +1. Read the entries that follow. + +If the data inside the inode is a leaf node (header depth = 0), then the entries will be leaf entries of 12 bytes: + +* first block in the file that this extent covers - 4 bytes +* number of blocks in this extent - 2 bytes - If the value of this field is <= 32768, the extent is initialized. If the value of the field is > 32768, the extent is uninitialized and the actual extent length is ee_len - 32768. Therefore, the maximum length of a initialized extent is 32768 blocks, and the maximum length of an uninitialized extent is 32767. +* upper 16 bits of the block location - 2 bytes +* lower 32 bits of the block location - 4 bytes + +For example, if a file has 1,000 blocks, and a particular extent entry points to blocks 100-299 of the file, and it starts +at filesystem block 10000, then the entry will be: + +* 100 (4 bytes) +* 200 (2 bytes) - is this correct? This would indicate uninitialized +* 0 (2 bytes) +* 10000 (4 bytes) + +If the data inside the inode is an internal node (header depth > 0), then the entries will be internal entries of 12 bytes: + +* first file block that this extent and all its children cover - 4 bytes +* lower 32 bits of the block number os the extent node on the next lower level - 4 bytes +* upper 16 bits of the block number of the extent node on the next lower level - 2 bytes +* unused - 2 bytes + +For example, if a file has 10,000 blocks, covered in 15 extents, then there will be 15 level 0 extents, and 1 level 1 extent, +and the 15 extents are stored in filesystem block 20000. + +The lower level 0 extent will look like our leaf node example above. +The upper level 1 extent will look like: + +* 0 (4 bytes) - because this starts from file block 0 +* 20000 (4 bytes) - the block number of the extent node on the next lower level +* 0 (2 bytes) - because lower 4 bytes were enough to cover + +You can find all of the blocks simply by looking at the root of the extent tree in the inode. + +* If the extents for the file are 4 or fewer, then the extent tree is stored in the inode itself. +* If the extents for the file are more than 4, but enough to fit the extents in 1-4 blocks, then: + * level 0 extents are stored in a single separate block + * level 1 extents are stored in the inode, with up to 4 entries pointing to the level 0 extents blocks +* If the extents for the file are more than fit in 4 blocks, then: + * level 0 extents are stored in as many blocks as needed + * level 1 extents are stored in other blocks pointing to level 0 extent blocks + * level 2 extents - up to 4 - are stored in the inode + +Each of these is repeated upwards. The maximum at the top of the tree is 4, the maximum in each block is `(blocksize-12)/12`. +Because: + +- each block of extent nodes needs a header of 12 bytes +- each extent node is 12 bytes + +### Read Directory Entries +To read directory entries + +1. Walk the tree until you find the inode for the directory you want. +2. Read the data blocks pointed to by that inode, see [inode to data blocks](#inode-to-data-blocks). +3. Interpret the data blocks. + +The directory itself is just a single "file". It has an inode that indicates the file "length", which is the number of bytes that the listing takes up. + +There are two types of directories: Classic and Hash Tree. Classic are just linear, unsorted, unordered lists of files. They work fine for shorter lists, but large directories can be slow to traverse if they grow too large. Once the contents of the directory "file" will be larger than a single block, ext4 switches it to a Hash Tree Directory Entry. + +Which directory type it is - classical linear or hash tree - does not affect the inode, for which it is just a file, but the contents of the directory entry "file". You can tell if it is linear or hash tree by checking the inode flag `EXT4_INDEX_FL`. If it is set (i.e. `& 0x1000`), then it is a hash tree. + +#### Classic Directory Entry +Each directory entry is at most 263 bytes long. They are arranged in sequential order in the file. The contents are: + +* first four bytes are a `uint32` giving the inode number +* next 2 bytes give the length of the directory entry (max 263) +* next 1 byte gives the length of the file name (which could be calculated from the directory entry length...) +* next 1 byte gives type: unknown, file, directory, char device, block device, FIFO, socket, symlink +* next (up to 255) bytes contain chars with the file or directory name + +The above is for the second version of ext4 directory entry (`ext4_dir_entry_2`). The slightly older version (`ext4_dir_entry`) is similar, except it does not give the file type, which in any case is in the inode. Instead it uses 2 bytes for the file name length. + +#### Hash Tree Directory Entry +Entries in the block are structured as follows: + +* `.` and `..` are the first two entries, and are classic `ext4_dir_entry_2` +* Look in byte `0x1c` to find the hash algorithm +* take the desired file/subdirectory name (just the `basename`) and hash it, see [Calculating the hash value][Calculating the hash value] +* look in the root directory entry in the hashmap to find the relative block number. Note that the block number is relative to the block in the directory, not the filesystem or block group. +* Next step depends on the hash tree depth: + * Depth = 0: read directory entry from the given block. + * Depth > 0: use the block as another lookup table, repeating the steps above, until we come to the depth. +* Once we have the final leaf block given by the hash table, we just read the block sequentially; it will be full of classical directory entries linearly. + +When reading the hashmap, it may not match precisely. Instead, it will fit within a range. The hashmap is sorted by `>=` to `<`. So if the table has entries as follows: + +| Hash | Block | +| -------|-------| +| 0 | 1 | +| 100 | 25 | +| 300 | 16 | + +Then: + +* all hash values from `0`-`99` will be in block `1` +* all hash values from `100-299` will be in block `25` +* all hash values from `300` to infinite will be in block `16` + +##### Calculating the hash value + +The hashing uses one of several algorithms. Most commonly, it is Half MD4. + +MD4 gives a digest length of 128 bits = 16 bytes. + +The "half md4" algorithm is given by the transformation code +[here](https://elixir.bootlin.com/linux/v4.6/source/lib/halfmd4.c#L26). The result +of it is 4 bytes. Those 4 bytes are the input to the hash. + +### Create a Directory Entry + +To create a directory, you need to go through the following steps: + +1. "Walk the tree" to find the parent directory. E.g. if you are creating `/usr/local/foo`, then you need to walk the tree to get to the directory "file" for `/usr/local`. If the parent directory is just the root `/`, e.g. you are creating `/foo`, then you use the root directory, whose inode always is `2`. +2. Determine if the parent directory is classical linear or hash tree, by checking the flag `EXT4_INDEX_FL` in the parent directory's inode. + * if hash: + 1. find a block in the "directory" file with space to add a linear entry + 1. create and add the entry + 1. calculate the hash of the filename + 1. add the `hash:block_number` entry into the tree + 1. rebalance if needed + * if linear, create the entry: + * if adding one will not exceed the size for linear, write it and done + * if adding one will exceed the size for linear, convert to hash, then write it + +#### Hash Tree + +1. Calculate the hash of the new directory entry name +2. Determine which block in the parent directory "file" the new entry should live, based on the hash table. +3. Find the block. +4. Add a classical linear entry at the end of it. +5. Update the inode for the parent directory with the new file size. + +If there is no room at the end of the block, you need to rebalance the hash tree. See below. + +#### Classical Linear + +1. Find the last block in the parent directory "file" + * if there is no room for another entry, extend the file size by another block, and update the inode for the file with the block map +2. Add a classical linear directory entry at the end of it. +3. Update the inode for the parent directory with the new file size, if any. E.g. if the entry fit within padding, there is no change in size. + +If this entry will cause the directory "file" to extend beyond a single block, convert to a hash tree. See below. + +### Rebalance Hash Tree + +Rebalancing the hash tree is rebalancing a btree, where the keys are the hash values. +You only ever need to rebalance when you add or remove an entry. + +#### Adding an entry + +When adding an entry, you only ever need to rebalance the node to which you add it, and parents up to the root. + +1. Calculate the hash of the entry +1. Determine the leaf node into which it should go +1. If the leaf node has less than the maximum number of elements, add it and done +1. If the lead node has the maximum number of elements: + 1. Add the new node in the right place + 1. Find the median + 1. Move the median up to the parent node + 1. If necessary, rebalance the parent node + +#### Removing an entry + +When removing an entry, you only ever need to rebalance the node from which you remove it, and parents up to the root. + +1. Calculate the hash of the entry +1. Determine the leaf node in which it exists +1. If the leaf node has less than the maximum number of elements, add it and done +1. If the lead node has the maximum number of elements: + 1. Add the new node in the right place + 1. Find the median + 1. Move the median up to the parent node + 1. If necessary, rebalance the parent node + +### Convert Classical Linear to Hash Tree + + +### Read File Contents + +1. Walk the tree until you find the inode for the file you want. +1. Find the data blocks for that inode, see [inode to data blocks](#inode-to-data-blocks). +1. Interpret the data blocks. + +### Create File + +### Write File Contents diff --git a/filesystem/ext4/extent.go b/filesystem/ext4/extent.go new file mode 100644 index 00000000..87fdef3a --- /dev/null +++ b/filesystem/ext4/extent.go @@ -0,0 +1,320 @@ +package ext4 + +import ( + "encoding/binary" + "fmt" +) + +const ( + extentTreeHeaderLength int = 12 + extentTreeEntryLength int = 12 + extentHeaderSignature uint16 = 0xf30a + extentTreeMaxDepth int = 5 +) + +// extens a structure holding multiple extents +type extents []extent + +// extent a structure with information about a single contiguous run of blocks containing file data +type extent struct { + // fileBlock block number relative to the file. E.g. if the file is composed of 5 blocks, this could be 0-4 + fileBlock uint32 + // startingBlock the first block on disk that contains the data in this extent. E.g. if the file is made up of data from blocks 100-104 on the disk, this would be 100 + startingBlock uint64 + // count how many contiguous blocks are covered by this extent + count uint16 +} + +// equal if 2 extents are equal +// +//nolint:unused // useful function for future +func (e *extent) equal(a *extent) bool { + if (e == nil && a != nil) || (a == nil && e != nil) { + return false + } + if e == nil && a == nil { + return true + } + return *e == *a +} + +// blocks how many blocks are covered in the extents +// +//nolint:unused // usefule function for future +func (e extents) blocks() uint64 { + var count uint64 + for _, ext := range e { + count += uint64(ext.count) + } + return count +} + +// extentBlockFinder provides a way of finding the blocks on disk that represent the block range of a given file. +// Arguments are the starting and ending blocks in the file. Returns a slice of blocks to read on disk. +// These blocks are in order. For example, if you ask to read file blocks starting at 20 for a count of 25, then you might +// get a single fileToBlocks{block: 100, count: 25} if the file is contiguous on disk. Or you might get +// fileToBlocks{block: 100, count: 10}, fileToBlocks{block: 200, count: 15} if the file is fragmented on disk. +// The slice should be read in order. +type extentBlockFinder interface { + // findBlocks find the actual blocks for a range in the file, given the start block in the file and how many blocks + findBlocks(start, count uint64, fs *FileSystem) ([]uint64, error) + // blocks get all of the blocks for a file, in sequential order, essentially unravels the tree into a slice of extents + blocks(fs *FileSystem) (extents, error) + // toBytes convert this extentBlockFinder to bytes to be stored in a block or inode + toBytes() []byte +} + +var ( + _ extentBlockFinder = &extentInternalNode{} + _ extentBlockFinder = &extentLeafNode{} +) + +// extentNodeHeader represents the header of an extent node +type extentNodeHeader struct { + depth uint16 // the depth of tree below here; for leaf nodes, will be 0 + entries uint16 // number of entries + max uint16 // maximum number of entries allowed at this level + blockSize uint32 // block size for this tree +} + +func (e extentNodeHeader) toBytes() []byte { + b := make([]byte, 12) + binary.LittleEndian.PutUint16(b[0:2], extentHeaderSignature) + binary.LittleEndian.PutUint16(b[2:4], e.entries) + binary.LittleEndian.PutUint16(b[4:6], e.max) + binary.LittleEndian.PutUint16(b[6:8], e.depth) + return b +} + +// extentChildPtr represents a child pointer in an internal node of extents +// the child could be a leaf node or another internal node. We only would know +// after parsing diskBlock to see its header. +type extentChildPtr struct { + fileBlock uint32 // extents or children of this cover from file block fileBlock onwards + count uint32 // how many blocks are covered by this extent + diskBlock uint64 // block number where the children live +} + +// extentLeafNode represents a leaf node of extents +// it includes the information in the header and the extents (leaf nodes). +// By definition, this is a leaf node, so depth=0 +type extentLeafNode struct { + extentNodeHeader + extents extents // the actual extents +} + +// findBlocks find the actual blocks for a range in the file. leaf nodes already have all of the data inside, +// so the FileSystem reference is unused. +func (e extentLeafNode) findBlocks(start, count uint64, _ *FileSystem) ([]uint64, error) { + var ret []uint64 + + // before anything, figure out which file block is the start and end of the desired range + end := start + count - 1 + + // we are at the bottom of the tree, so we can just return the extents + for _, ext := range e.extents { + extentStart := uint64(ext.fileBlock) + extentEnd := uint64(ext.fileBlock + uint32(ext.count) - 1) + + // Check if the extent does not overlap with the given block range + if extentEnd < start || extentStart > end { + continue + } + + // Calculate the overlapping range + overlapStart := max(start, extentStart) + overlapEnd := min(end, extentEnd) + + // Calculate the starting disk block for the overlap + diskBlockStart := ext.startingBlock + (overlapStart - extentStart) + + // Append the corresponding disk blocks to the result + for i := uint64(0); i <= overlapEnd-overlapStart; i++ { + ret = append(ret, diskBlockStart+i) + } + } + return ret, nil +} + +// blocks find the actual blocks for a range in the file. leaf nodes already have all of the data inside, +// so the FileSystem reference is unused. +func (e extentLeafNode) blocks(_ *FileSystem) (extents, error) { + return e.extents[:], nil +} + +// toBytes convert the node to raw bytes to be stored, either in a block or in an inode +func (e extentLeafNode) toBytes() []byte { + // 12 byte header, 12 bytes per child + b := make([]byte, 12+12*e.max) + copy(b[0:12], e.extentNodeHeader.toBytes()) + + for i, ext := range e.extents { + base := (i + 1) * 12 + binary.LittleEndian.PutUint32(b[base:base+4], ext.fileBlock) + binary.LittleEndian.PutUint16(b[base+4:base+6], ext.count) + diskBlock := make([]byte, 8) + binary.LittleEndian.PutUint64(diskBlock, ext.startingBlock) + copy(b[base+6:base+8], diskBlock[4:6]) + copy(b[base+8:base+12], diskBlock[0:4]) + } + return b +} + +// extentInternalNode represents an internal node in a tree of extents +// it includes the information in the header and the internal nodes +// By definition, this is an internal node, so depth>0 +type extentInternalNode struct { + extentNodeHeader + children []*extentChildPtr // the children +} + +// findBlocks find the actual blocks for a range in the file. internal nodes need to read the filesystem to +// get the child nodes, so the FileSystem reference is used. +func (e extentInternalNode) findBlocks(start, count uint64, fs *FileSystem) ([]uint64, error) { + var ret []uint64 + + // before anything, figure out which file block is the start and end of the desired range + end := start + count - 1 + + // we are not depth 0, so we have children extent tree nodes. Figure out which ranges we are in. + // the hard part here is that each child has start but not end or count. You only know it from reading the next one. + // So if the one we are looking at is in the range, we get it from the children, and keep going + for _, child := range e.children { + extentStart := uint64(child.fileBlock) + extentEnd := uint64(child.fileBlock + child.count - 1) + + // Check if the extent does not overlap with the given block range + if extentEnd < start || extentStart > end { + continue + } + + // read the extent block from the disk + b, err := fs.readBlock(child.diskBlock) + if err != nil { + return nil, err + } + ebf, err := parseExtents(b, e.blockSize, uint32(extentStart), uint32(extentEnd)) + if err != nil { + return nil, err + } + blocks, err := ebf.findBlocks(extentStart, uint64(child.count), fs) + if err != nil { + return nil, err + } + if len(blocks) > 0 { + ret = append(ret, blocks...) + } + } + return ret, nil +} + +// blocks find the actual blocks for a range in the file. leaf nodes already have all of the data inside, +// so the FileSystem reference is unused. +func (e extentInternalNode) blocks(fs *FileSystem) (extents, error) { + var ret extents + + // we are not depth 0, so we have children extent tree nodes. Walk the tree below us and find all of the blocks + for _, child := range e.children { + // read the extent block from the disk + b, err := fs.readBlock(child.diskBlock) + if err != nil { + return nil, err + } + ebf, err := parseExtents(b, e.blockSize, child.fileBlock, child.fileBlock+child.count-1) + if err != nil { + return nil, err + } + blocks, err := ebf.blocks(fs) + if err != nil { + return nil, err + } + if len(blocks) > 0 { + ret = append(ret, blocks...) + } + } + return ret, nil +} + +// toBytes convert the node to raw bytes to be stored, either in a block or in an inode +func (e extentInternalNode) toBytes() []byte { + // 12 byte header, 12 bytes per child + b := make([]byte, 12+12*e.max) + copy(b[0:12], e.extentNodeHeader.toBytes()) + + for i, child := range e.children { + base := (i + 1) * 12 + binary.LittleEndian.PutUint32(b[base:base+4], child.fileBlock) + diskBlock := make([]byte, 8) + binary.LittleEndian.PutUint64(diskBlock, child.diskBlock) + copy(b[base+4:base+8], diskBlock[0:4]) + copy(b[base+8:base+10], diskBlock[4:6]) + } + return b +} + +// parseExtents takes bytes, parses them to find the actual extents or the next blocks down. +// It does not recurse down the tree, as we do not want to do that until we actually are ready +// to read those blocks. This is similar to how ext4 driver in the Linux kernel does it. +// totalBlocks is the total number of blocks covered in this given section of the extent tree. +func parseExtents(b []byte, blocksize, start, count uint32) (extentBlockFinder, error) { + var ret extentBlockFinder + // must have at least header and one entry + minLength := extentTreeHeaderLength + extentTreeEntryLength + if len(b) < minLength { + return nil, fmt.Errorf("cannot parse extent tree from %d bytes, minimum required %d", len(b), minLength) + } + // check magic signature + if binary.LittleEndian.Uint16(b[0:2]) != extentHeaderSignature { + return nil, fmt.Errorf("invalid extent tree signature: %x", b[0x0:0x2]) + } + e := extentNodeHeader{ + entries: binary.LittleEndian.Uint16(b[0x2:0x4]), + max: binary.LittleEndian.Uint16(b[0x4:0x6]), + depth: binary.LittleEndian.Uint16(b[0x6:0x8]), + blockSize: blocksize, + } + // b[0x8:0xc] is used for the generation by Lustre but not standard ext4, so we ignore + + // we have parsed the header, now read either the leaf entries or the intermediate nodes + switch e.depth { + case 0: + var leafNode extentLeafNode + // read the leaves + for i := 0; i < int(e.entries); i++ { + start := i*extentTreeEntryLength + extentTreeHeaderLength + diskBlock := make([]byte, 8) + copy(diskBlock[0:4], b[start+8:start+12]) + copy(diskBlock[4:6], b[start+6:start+8]) + leafNode.extents = append(leafNode.extents, extent{ + fileBlock: binary.LittleEndian.Uint32(b[start : start+4]), + count: binary.LittleEndian.Uint16(b[start+4 : start+6]), + startingBlock: binary.LittleEndian.Uint64(diskBlock), + }) + } + ret = leafNode + default: + var ( + internalNode extentInternalNode + ) + for i := 0; i < int(e.entries); i++ { + start := i*extentTreeEntryLength + extentTreeHeaderLength + diskBlock := make([]byte, 8) + copy(diskBlock[0:4], b[start+4:start+8]) + copy(diskBlock[4:6], b[start+8:start+10]) + ptr := &extentChildPtr{ + diskBlock: binary.LittleEndian.Uint64(diskBlock), + fileBlock: binary.LittleEndian.Uint32(b[start : start+4]), + } + internalNode.children = append(internalNode.children, ptr) + if i > 0 { + internalNode.children[i-1].count = ptr.fileBlock - internalNode.children[i-1].fileBlock + } + } + if len(internalNode.children) > 0 { + internalNode.children[len(internalNode.children)-1].count = start + count - internalNode.children[len(internalNode.children)-1].fileBlock + } + ret = internalNode + } + + return ret, nil +} diff --git a/filesystem/ext4/features.go b/filesystem/ext4/features.go new file mode 100644 index 00000000..cbba49e5 --- /dev/null +++ b/filesystem/ext4/features.go @@ -0,0 +1,433 @@ +package ext4 + +// featureFlags is a structure holding which flags are set - compatible, incompatible and read-only compatible +type featureFlags struct { + // compatible, incompatible, and compatibleReadOnly feature flags + directoryPreAllocate bool + imagicInodes bool + hasJournal bool + extendedAttributes bool + reservedGDTBlocksForExpansion bool + directoryIndices bool + lazyBlockGroup bool + excludeInode bool + excludeBitmap bool + sparseSuperBlockV2 bool + compression bool + directoryEntriesRecordFileType bool + recoveryNeeded bool + separateJournalDevice bool + metaBlockGroups bool + extents bool + fs64Bit bool + multipleMountProtection bool + flexBlockGroups bool + extendedAttributeInodes bool + dataInDirectoryEntries bool + metadataChecksumSeedInSuperblock bool + largeDirectory bool + dataInInode bool + encryptInodes bool + sparseSuperblock bool + largeFile bool + btreeDirectory bool + hugeFile bool + gdtChecksum bool + largeSubdirectoryCount bool + largeInodes bool + snapshot bool + quota bool + bigalloc bool + metadataChecksums bool + replicas bool + readOnly bool + projectQuotas bool +} + +func parseFeatureFlags(compatFlags, incompatFlags, roCompatFlags uint32) featureFlags { + f := featureFlags{ + directoryPreAllocate: compatFeatureDirectoryPreAllocate.included(compatFlags), + imagicInodes: compatFeatureImagicInodes.included(compatFlags), + hasJournal: compatFeatureHasJournal.included(compatFlags), + extendedAttributes: compatFeatureExtendedAttributes.included(compatFlags), + reservedGDTBlocksForExpansion: compatFeatureReservedGDTBlocksForExpansion.included(compatFlags), + directoryIndices: compatFeatureDirectoryIndices.included(compatFlags), + lazyBlockGroup: compatFeatureLazyBlockGroup.included(compatFlags), + excludeInode: compatFeatureExcludeInode.included(compatFlags), + excludeBitmap: compatFeatureExcludeBitmap.included(compatFlags), + sparseSuperBlockV2: compatFeatureSparseSuperBlockV2.included(compatFlags), + compression: incompatFeatureCompression.included(incompatFlags), + directoryEntriesRecordFileType: incompatFeatureDirectoryEntriesRecordFileType.included(incompatFlags), + recoveryNeeded: incompatFeatureRecoveryNeeded.included(incompatFlags), + separateJournalDevice: incompatFeatureSeparateJournalDevice.included(incompatFlags), + metaBlockGroups: incompatFeatureMetaBlockGroups.included(incompatFlags), + extents: incompatFeatureExtents.included(incompatFlags), + fs64Bit: incompatFeature64Bit.included(incompatFlags), + multipleMountProtection: incompatFeatureMultipleMountProtection.included(incompatFlags), + flexBlockGroups: incompatFeatureFlexBlockGroups.included(incompatFlags), + extendedAttributeInodes: incompatFeatureExtendedAttributeInodes.included(incompatFlags), + dataInDirectoryEntries: incompatFeatureDataInDirectoryEntries.included(incompatFlags), + metadataChecksumSeedInSuperblock: incompatFeatureMetadataChecksumSeedInSuperblock.included(incompatFlags), + largeDirectory: incompatFeatureLargeDirectory.included(incompatFlags), + dataInInode: incompatFeatureDataInInode.included(incompatFlags), + encryptInodes: incompatFeatureEncryptInodes.included(incompatFlags), + sparseSuperblock: roCompatFeatureSparseSuperblock.included(roCompatFlags), + largeFile: roCompatFeatureLargeFile.included(roCompatFlags), + btreeDirectory: roCompatFeatureBtreeDirectory.included(roCompatFlags), + hugeFile: roCompatFeatureHugeFile.included(roCompatFlags), + gdtChecksum: roCompatFeatureGDTChecksum.included(roCompatFlags), + largeSubdirectoryCount: roCompatFeatureLargeSubdirectoryCount.included(roCompatFlags), + largeInodes: roCompatFeatureLargeInodes.included(roCompatFlags), + snapshot: roCompatFeatureSnapshot.included(roCompatFlags), + quota: roCompatFeatureQuota.included(roCompatFlags), + bigalloc: roCompatFeatureBigalloc.included(roCompatFlags), + metadataChecksums: roCompatFeatureMetadataChecksums.included(roCompatFlags), + replicas: roCompatFeatureReplicas.included(roCompatFlags), + readOnly: roCompatFeatureReadOnly.included(roCompatFlags), + projectQuotas: roCompatFeatureProjectQuotas.included(roCompatFlags), + } + + return f +} + +//nolint:gocyclo // we know this has cyclomatic complexity, but not worth breaking apart +func (f *featureFlags) toInts() (compatFlags, incompatFlags, roCompatFlags uint32) { + // compatible flags + if f.directoryPreAllocate { + compatFlags |= uint32(compatFeatureDirectoryPreAllocate) + } + if f.imagicInodes { + compatFlags |= uint32(compatFeatureImagicInodes) + } + if f.hasJournal { + compatFlags |= uint32(compatFeatureHasJournal) + } + if f.extendedAttributes { + compatFlags |= uint32(compatFeatureExtendedAttributes) + } + if f.reservedGDTBlocksForExpansion { + compatFlags |= uint32(compatFeatureReservedGDTBlocksForExpansion) + } + if f.directoryIndices { + compatFlags |= uint32(compatFeatureDirectoryIndices) + } + if f.lazyBlockGroup { + compatFlags |= uint32(compatFeatureLazyBlockGroup) + } + if f.excludeInode { + compatFlags |= uint32(compatFeatureExcludeInode) + } + if f.excludeBitmap { + compatFlags |= uint32(compatFeatureExcludeBitmap) + } + if f.sparseSuperBlockV2 { + compatFlags |= uint32(compatFeatureSparseSuperBlockV2) + } + + // incompatible flags + if f.compression { + incompatFlags |= uint32(incompatFeatureCompression) + } + if f.directoryEntriesRecordFileType { + incompatFlags |= uint32(incompatFeatureDirectoryEntriesRecordFileType) + } + if f.recoveryNeeded { + incompatFlags |= uint32(incompatFeatureRecoveryNeeded) + } + if f.separateJournalDevice { + incompatFlags |= uint32(incompatFeatureSeparateJournalDevice) + } + if f.metaBlockGroups { + incompatFlags |= uint32(incompatFeatureMetaBlockGroups) + } + if f.extents { + incompatFlags |= uint32(incompatFeatureExtents) + } + if f.fs64Bit { + incompatFlags |= uint32(incompatFeature64Bit) + } + if f.multipleMountProtection { + incompatFlags |= uint32(incompatFeatureMultipleMountProtection) + } + if f.flexBlockGroups { + incompatFlags |= uint32(incompatFeatureFlexBlockGroups) + } + if f.extendedAttributeInodes { + incompatFlags |= uint32(incompatFeatureExtendedAttributeInodes) + } + if f.dataInDirectoryEntries { + incompatFlags |= uint32(incompatFeatureDataInDirectoryEntries) + } + if f.metadataChecksumSeedInSuperblock { + incompatFlags |= uint32(incompatFeatureMetadataChecksumSeedInSuperblock) + } + if f.largeDirectory { + incompatFlags |= uint32(incompatFeatureLargeDirectory) + } + if f.dataInInode { + incompatFlags |= uint32(incompatFeatureDataInInode) + } + if f.encryptInodes { + incompatFlags |= uint32(incompatFeatureEncryptInodes) + } + + // read only compatible flags + if f.sparseSuperblock { + roCompatFlags |= uint32(roCompatFeatureSparseSuperblock) + } + if f.largeFile { + roCompatFlags |= uint32(roCompatFeatureLargeFile) + } + if f.btreeDirectory { + roCompatFlags |= uint32(roCompatFeatureBtreeDirectory) + } + if f.hugeFile { + roCompatFlags |= uint32(roCompatFeatureHugeFile) + } + if f.gdtChecksum { + roCompatFlags |= uint32(roCompatFeatureGDTChecksum) + } + if f.largeSubdirectoryCount { + roCompatFlags |= uint32(roCompatFeatureLargeSubdirectoryCount) + } + if f.largeInodes { + roCompatFlags |= uint32(roCompatFeatureLargeInodes) + } + if f.snapshot { + roCompatFlags |= uint32(roCompatFeatureSnapshot) + } + if f.quota { + roCompatFlags |= uint32(roCompatFeatureQuota) + } + if f.bigalloc { + roCompatFlags |= uint32(roCompatFeatureBigalloc) + } + if f.metadataChecksums { + roCompatFlags |= uint32(roCompatFeatureMetadataChecksums) + } + if f.replicas { + roCompatFlags |= uint32(roCompatFeatureReplicas) + } + if f.readOnly { + roCompatFlags |= uint32(roCompatFeatureReadOnly) + } + if f.projectQuotas { + roCompatFlags |= uint32(roCompatFeatureProjectQuotas) + } + + return compatFlags, incompatFlags, roCompatFlags +} + +// default features +/* + base_features = sparse_super,large_file,filetype,resize_inode,dir_index,ext_attr + features = has_journal,extent,huge_file,flex_bg,uninit_bg,64bit,dir_nlink,extra_isize +*/ +var defaultFeatureFlags = featureFlags{ + largeFile: true, + hugeFile: true, + sparseSuperblock: true, + flexBlockGroups: true, + hasJournal: true, + extents: true, + fs64Bit: true, + extendedAttributes: true, +} + +type FeatureOpt func(*featureFlags) + +func WithFeatureDirectoryPreAllocate(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.directoryPreAllocate = enable + } +} +func WithFeatureImagicInodes(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.imagicInodes = enable + } +} +func WithFeatureHasJournal(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.hasJournal = enable + } +} +func WithFeatureExtendedAttributes(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.extendedAttributes = enable + } +} +func WithFeatureReservedGDTBlocksForExpansion(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.reservedGDTBlocksForExpansion = enable + } +} +func WithFeatureDirectoryIndices(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.directoryIndices = enable + } +} +func WithFeatureLazyBlockGroup(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.lazyBlockGroup = enable + } +} +func WithFeatureExcludeInode(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.excludeInode = enable + } +} +func WithFeatureExcludeBitmap(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.excludeBitmap = enable + } +} +func WithFeatureSparseSuperBlockV2(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.sparseSuperBlockV2 = enable + } +} +func WithFeatureCompression(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.compression = enable + } +} +func WithFeatureDirectoryEntriesRecordFileType(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.directoryEntriesRecordFileType = enable + } +} +func WithFeatureRecoveryNeeded(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.recoveryNeeded = enable + } +} +func WithFeatureSeparateJournalDevice(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.separateJournalDevice = enable + } +} +func WithFeatureMetaBlockGroups(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.metaBlockGroups = enable + } +} +func WithFeatureExtents(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.extents = enable + } +} +func WithFeatureFS64Bit(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.fs64Bit = enable + } +} +func WithFeatureMultipleMountProtection(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.multipleMountProtection = enable + } +} +func WithFeatureFlexBlockGroups(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.flexBlockGroups = enable + } +} +func WithFeatureExtendedAttributeInodes(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.extendedAttributeInodes = enable + } +} +func WithFeatureDataInDirectoryEntries(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.dataInDirectoryEntries = enable + } +} +func WithFeatureMetadataChecksumSeedInSuperblock(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.metadataChecksumSeedInSuperblock = enable + } +} +func WithFeatureLargeDirectory(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.largeDirectory = enable + } +} +func WithFeatureDataInInode(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.dataInInode = enable + } +} +func WithFeatureEncryptInodes(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.encryptInodes = enable + } +} +func WithFeatureSparseSuperblock(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.sparseSuperblock = enable + } +} +func WithFeatureLargeFile(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.largeFile = enable + } +} +func WithFeatureBTreeDirectory(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.btreeDirectory = enable + } +} +func WithFeatureHugeFile(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.hugeFile = enable + } +} +func WithFeatureGDTChecksum(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.gdtChecksum = enable + } +} +func WithFeatureLargeSubdirectoryCount(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.largeSubdirectoryCount = enable + } +} +func WithFeatureLargeInodes(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.largeInodes = enable + } +} +func WithFeatureSnapshot(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.snapshot = enable + } +} +func WithFeatureQuota(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.quota = enable + } +} +func WithFeatureBigalloc(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.bigalloc = enable + } +} +func WithFeatureMetadataChecksums(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.metadataChecksums = enable + } +} +func WithFeatureReplicas(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.replicas = enable + } +} +func WithFeatureReadOnly(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.readOnly = enable + } +} +func WithFeatureProjectQuotas(enable bool) FeatureOpt { + return func(o *featureFlags) { + o.projectQuotas = enable + } +} diff --git a/filesystem/ext4/file.go b/filesystem/ext4/file.go new file mode 100644 index 00000000..ec0fc722 --- /dev/null +++ b/filesystem/ext4/file.go @@ -0,0 +1,102 @@ +package ext4 + +import ( + "errors" + "fmt" + "io" +) + +// File represents a single file in an ext4 filesystem +type File struct { + *directoryEntry + *inode + isReadWrite bool + isAppend bool + offset int64 + filesystem *FileSystem + extents extents +} + +// Read reads up to len(b) bytes from the File. +// It returns the number of bytes read and any error encountered. +// At end of file, Read returns 0, io.EOF +// reads from the last known offset in the file from last read or write +// use Seek() to set at a particular point +func (fl *File) Read(b []byte) (int, error) { + totalRead := 0 + fs := fl.filesystem + blocksize := int64(fs.superblock.blockSize) + + // find the starting block for it + // TODO: optimize this. It likely is not necessary to loop through all extents, we rather could + // keep a pointer as to what extent we are in. + for _, e := range fl.extents { + // figure out which extents contain the data we are looking to read + // this is not fixed, as we adjust it as we read, so it is inside the extents loop + fileStartBlock := fl.offset / blocksize + locationInStartBlock := fl.offset % blocksize + if e.startingBlock > uint64(fileStartBlock) || (e.startingBlock+uint64(e.count)-1) < uint64(fileStartBlock) { + continue + } + // total number of bytes in the extent + extentSizeInBytes := int64(e.count) * blocksize + // starting point of the extent on the disk + extentOffset := int64(e.startingBlock)*blocksize + locationInStartBlock + // how much data is left in the extent beginning with the offset? + extentRemainder := extentSizeInBytes - locationInStartBlock + // how much can we read from this extent? + readLen := min(extentRemainder, int64(len(b))) + // read the data from the extent + _, err := fs.file.ReadAt(b[totalRead:totalRead+int(readLen)], extentOffset) + if err != nil { + return 0, err + } + // update the total read and the offset + totalRead += int(readLen) + fl.offset += readLen + // have we read everything we need to read? + if totalRead >= len(b) { + break + } + } + // did we reach the end? + if totalRead < len(b) || fl.offset >= int64(fl.size) { + return totalRead, io.EOF + } + return totalRead, nil +} + +// Write writes len(b) bytes to the File. +// It returns the number of bytes written and an error, if any. +// returns a non-nil error when n != len(b) +// writes to the last known offset in the file from last read or write +// use Seek() to set at a particular point +// +//nolint:revive // params not used because still read-only, will be used in the future when read-write +func (fl *File) Write(p []byte) (int, error) { + return 0, errors.New("not implemented") +} + +// Seek set the offset to a particular point in the file +func (fl *File) Seek(offset int64, whence int) (int64, error) { + newOffset := int64(0) + switch whence { + case io.SeekStart: + newOffset = offset + case io.SeekEnd: + newOffset = int64(fl.size) + offset + case io.SeekCurrent: + newOffset = fl.offset + offset + } + if newOffset < 0 { + return fl.offset, fmt.Errorf("cannot set offset %d before start of file", offset) + } + fl.offset = newOffset + return fl.offset, nil +} + +// Close close a file that is being read +func (fl *File) Close() error { + *fl = File{} + return nil +} diff --git a/filesystem/ext4/fileinfo.go b/filesystem/ext4/fileinfo.go new file mode 100644 index 00000000..4449c284 --- /dev/null +++ b/filesystem/ext4/fileinfo.go @@ -0,0 +1,48 @@ +package ext4 + +import ( + "os" + "time" +) + +// FileInfo represents the information for an individual file +// it fulfills os.FileInfo interface +type FileInfo struct { + modTime time.Time + mode os.FileMode + name string + size int64 + isDir bool +} + +// IsDir abbreviation for Mode().IsDir() +func (fi FileInfo) IsDir() bool { + return fi.isDir +} + +// ModTime modification time +func (fi FileInfo) ModTime() time.Time { + return fi.modTime +} + +// Mode returns file mode +func (fi FileInfo) Mode() os.FileMode { + return fi.mode +} + +// Name base name of the file +// +// will return the long name of the file. If none exists, returns the shortname and extension +func (fi FileInfo) Name() string { + return fi.name +} + +// Size length in bytes for regular files +func (fi FileInfo) Size() int64 { + return fi.size +} + +// Sys underlying data source - not supported yet and so will return nil +func (fi FileInfo) Sys() interface{} { + return nil +} diff --git a/filesystem/ext4/groupdescriptors.go b/filesystem/ext4/groupdescriptors.go new file mode 100644 index 00000000..40ce27d9 --- /dev/null +++ b/filesystem/ext4/groupdescriptors.go @@ -0,0 +1,304 @@ +package ext4 + +import ( + "encoding/binary" + "fmt" + "hash/crc32" +) + +type blockGroupFlag uint16 +type gdtChecksumType uint8 + +func (b blockGroupFlag) included(a uint16) bool { + return a&uint16(b) == uint16(b) +} + +//nolint:unused // will be used in the future, not yet +func (g gdtChecksumType) included(a uint8) bool { + return a&uint8(g) == uint8(g) +} + +const ( + groupDescriptorSize uint16 = 32 + groupDescriptorSize64Bit uint16 = 64 + blockGroupFlagInodesUninitialized blockGroupFlag = 0x1 + blockGroupFlagBlockBitmapUninitialized blockGroupFlag = 0x2 + blockGroupFlagInodeTableZeroed blockGroupFlag = 0x3 + gdtChecksumNone gdtChecksumType = 0 + gdtChecksumGdt gdtChecksumType = 1 + gdtChecksumMetadata gdtChecksumType = 2 +) + +type blockGroupFlags struct { + inodesUninitialized bool + blockBitmapUninitialized bool + inodeTableZeroed bool +} + +// groupdescriptors is a structure holding all of the group descriptors for all of the block groups +type groupDescriptors struct { + descriptors []groupDescriptor +} + +// groupDescriptor is a structure holding the data about a single block group +type groupDescriptor struct { + blockBitmapLocation uint64 + inodeBitmapLocation uint64 + inodeTableLocation uint64 + freeBlocks uint32 + freeInodes uint32 + usedDirectories uint32 + flags blockGroupFlags + snapshotExclusionBitmapLocation uint64 + blockBitmapChecksum uint32 + inodeBitmapChecksum uint32 + unusedInodes uint32 + is64bit bool + number uint64 +} + +func (gds *groupDescriptors) equal(a *groupDescriptors) bool { + if gds == nil && a == nil { + return true + } + if (gds == nil && a != nil) || (a == nil && gds != nil) || len(gds.descriptors) != len(a.descriptors) { + return false + } + + // both not nil, same size, so compare them + for i, g := range gds.descriptors { + if g != a.descriptors[i] { + return false + } + } + // if we made it this far, all the same + return true +} + +// groupDescriptorsFromBytes create a groupDescriptors struct from bytes +func groupDescriptorsFromBytes(b []byte, is64bit bool, superblockUUID []byte, checksumType gdtChecksumType) (*groupDescriptors, error) { + gds := groupDescriptors{} + gdSlice := make([]groupDescriptor, 10) + + gdSize := int(groupDescriptorSize) + if is64bit { + gdSize = int(groupDescriptorSize64Bit) + } + count := len(b) / gdSize + + // go through them gdSize bytes at a time + for i := 0; i < count; i++ { + start := i * gdSize + end := start + gdSize + gd, err := groupDescriptorFromBytes(b[start:end], is64bit, i, checksumType, superblockUUID) + if err != nil || gd == nil { + return nil, fmt.Errorf("error creating group descriptor from bytes: %w", err) + } + gdSlice = append(gdSlice, *gd) + } + gds.descriptors = gdSlice + + return &gds, nil +} + +// toBytes returns groupDescriptors ready to be written to disk +func (gds *groupDescriptors) toBytes(checksumType gdtChecksumType, superblockUUID []byte) []byte { + b := make([]byte, 10*groupDescriptorSize) + for _, gd := range gds.descriptors { + b2 := gd.toBytes(checksumType, superblockUUID) + b = append(b, b2...) + } + + return b +} + +// groupDescriptorFromBytes create a groupDescriptor struct from bytes +func groupDescriptorFromBytes(b []byte, is64bit bool, number int, checksumType gdtChecksumType, superblockUUID []byte) (*groupDescriptor, error) { + // block count, reserved block count and free blocks depends on whether the fs is 64-bit or not + blockBitmapLocation := make([]byte, 8) + inodeBitmapLocation := make([]byte, 8) + inodeTableLocation := make([]byte, 8) + freeBlocks := make([]byte, 4) + freeInodes := make([]byte, 4) + usedirectories := make([]byte, 4) + snapshotExclusionBitmapLocation := make([]byte, 8) + blockBitmapChecksum := make([]byte, 4) + inodeBitmapChecksum := make([]byte, 4) + unusedInodes := make([]byte, 4) + + copy(blockBitmapLocation[0:4], b[0x0:0x4]) + copy(inodeBitmapLocation[0:4], b[0x4:0x8]) + copy(inodeTableLocation[0:4], b[0x8:0xc]) + copy(freeBlocks[0:2], b[0xc:0xe]) + copy(freeInodes[0:2], b[0xe:0x10]) + copy(usedirectories[0:2], b[0x10:0x12]) + copy(snapshotExclusionBitmapLocation[0:4], b[0x14:0x18]) + copy(blockBitmapChecksum[0:2], b[0x18:0x1a]) + copy(inodeBitmapChecksum[0:2], b[0x1a:0x1c]) + copy(unusedInodes[0:2], b[0x1c:0x1e]) + + if is64bit { + copy(blockBitmapLocation[4:8], b[0x20:0x24]) + copy(inodeBitmapLocation[4:8], b[0x24:0x28]) + copy(inodeTableLocation[4:8], b[0x28:0x2c]) + copy(freeBlocks[2:4], b[0x2c:0x2e]) + copy(freeInodes[2:4], b[0x2e:0x30]) + copy(usedirectories[2:4], b[0x30:0x32]) + copy(unusedInodes[2:4], b[0x32:0x34]) + copy(snapshotExclusionBitmapLocation[4:8], b[0x34:0x38]) + copy(blockBitmapChecksum[2:4], b[0x38:0x3a]) + copy(inodeBitmapChecksum[2:4], b[0x3a:0x3c]) + } + + gdNumber := uint64(number) + // only bother with checking the checksum if it was not type none (pre-checksums) + if checksumType != gdtChecksumNone { + checksum := binary.LittleEndian.Uint16(b[0x1e:0x20]) + actualChecksum := groupDescriptorChecksum(b[0x0:0x1e], superblockUUID, gdNumber, checksumType) + if checksum != actualChecksum { + return nil, fmt.Errorf("checksum mismatch, passed %x, actual %x", checksum, actualChecksum) + } + } + + gd := groupDescriptor{ + is64bit: is64bit, + number: gdNumber, + blockBitmapLocation: binary.LittleEndian.Uint64(blockBitmapLocation), + inodeBitmapLocation: binary.LittleEndian.Uint64(inodeBitmapChecksum), + inodeTableLocation: binary.LittleEndian.Uint64(inodeTableLocation), + freeBlocks: binary.LittleEndian.Uint32(freeBlocks), + freeInodes: binary.LittleEndian.Uint32(freeInodes), + usedDirectories: binary.LittleEndian.Uint32(usedirectories), + snapshotExclusionBitmapLocation: binary.LittleEndian.Uint64(snapshotExclusionBitmapLocation), + blockBitmapChecksum: binary.LittleEndian.Uint32(blockBitmapChecksum), + inodeBitmapChecksum: binary.LittleEndian.Uint32(inodeBitmapChecksum), + unusedInodes: binary.LittleEndian.Uint32(unusedInodes), + flags: parseBlockGroupFlags(binary.LittleEndian.Uint16(b[0x12:0x14])), + } + + return &gd, nil +} + +// toBytes returns a groupDescriptor ready to be written to disk +func (gd *groupDescriptor) toBytes(checksumType gdtChecksumType, superblockUUID []byte) []byte { + gdSize := groupDescriptorSize + + // size of byte slice returned depends upon if using 64bit or 32bit filesystem + if gd.is64bit { + gdSize = groupDescriptorSize64Bit + } + b := make([]byte, gdSize) + + blockBitmapLocation := make([]byte, 8) + inodeBitmapLocation := make([]byte, 8) + inodeTableLocation := make([]byte, 8) + freeBlocks := make([]byte, 4) + freeInodes := make([]byte, 4) + usedirectories := make([]byte, 4) + snapshotExclusionBitmapLocation := make([]byte, 8) + blockBitmapChecksum := make([]byte, 4) + inodeBitmapChecksum := make([]byte, 4) + unusedInodes := make([]byte, 4) + + binary.LittleEndian.PutUint64(blockBitmapLocation, gd.blockBitmapLocation) + binary.LittleEndian.PutUint64(inodeTableLocation, gd.inodeTableLocation) + binary.LittleEndian.PutUint64(inodeBitmapLocation, gd.inodeBitmapLocation) + binary.LittleEndian.PutUint32(freeBlocks, gd.freeBlocks) + binary.LittleEndian.PutUint32(freeInodes, gd.freeInodes) + binary.LittleEndian.PutUint32(usedirectories, gd.usedDirectories) + binary.LittleEndian.PutUint64(snapshotExclusionBitmapLocation, gd.snapshotExclusionBitmapLocation) + binary.LittleEndian.PutUint32(blockBitmapChecksum, gd.blockBitmapChecksum) + binary.LittleEndian.PutUint32(inodeBitmapChecksum, gd.inodeBitmapChecksum) + binary.LittleEndian.PutUint32(unusedInodes, gd.unusedInodes) + + // copy the lower 32 bytes in + copy(b[0x0:0x4], blockBitmapLocation[0:4]) + copy(b[0x4:0x8], inodeBitmapLocation[0:4]) + copy(b[0x8:0xc], inodeTableLocation[0:4]) + copy(b[0xc:0xe], freeBlocks[0:2]) + copy(b[0xe:0x10], freeInodes[0:2]) + copy(b[0x10:0x12], usedirectories[0:2]) + binary.LittleEndian.PutUint16(b[0x12:0x14], gd.flags.toInt()) + copy(b[0x14:0x18], snapshotExclusionBitmapLocation[0:4]) + copy(b[0x18:0x1a], blockBitmapChecksum[0:2]) + copy(b[0x1a:0x1c], inodeBitmapChecksum[0:2]) + copy(b[0x1c:0x1e], unusedInodes[0:2]) + + // now for the upper 32 bytes + if gd.is64bit { + copy(b[0x20:0x24], blockBitmapLocation[4:8]) + copy(b[0x24:0x28], inodeBitmapLocation[4:8]) + copy(b[0x28:0x2c], inodeTableLocation[4:8]) + copy(b[0x2c:0x2e], freeBlocks[2:4]) + copy(b[0x2e:0x30], freeInodes[2:4]) + copy(b[0x30:0x32], usedirectories[2:4]) + copy(b[0x32:0x34], unusedInodes[2:4]) + copy(b[0x34:0x38], snapshotExclusionBitmapLocation[4:8]) + copy(b[0x38:0x3a], blockBitmapChecksum[2:4]) + copy(b[0x3a:0x3c], inodeBitmapChecksum[2:4]) + } + + checksum := groupDescriptorChecksum(b[0x0:0x1e], superblockUUID, gd.number, checksumType) + binary.LittleEndian.PutUint16(b[0x1e:0x20], checksum) + + return b +} + +func parseBlockGroupFlags(flags uint16) blockGroupFlags { + f := blockGroupFlags{ + inodeTableZeroed: blockGroupFlagInodeTableZeroed.included(flags), + inodesUninitialized: blockGroupFlagInodesUninitialized.included(flags), + blockBitmapUninitialized: blockGroupFlagBlockBitmapUninitialized.included(flags), + } + + return f +} + +func (f *blockGroupFlags) toInt() uint16 { + var ( + flags uint16 + ) + + // compatible flags + if f.inodeTableZeroed { + flags |= uint16(blockGroupFlagInodeTableZeroed) + } + if f.inodesUninitialized { + flags |= uint16(blockGroupFlagInodesUninitialized) + } + if f.blockBitmapUninitialized { + flags |= uint16(blockGroupFlagBlockBitmapUninitialized) + } + return flags +} + +// groupDescriptorChecksum calculate the checksum for a block group descriptor +// NOTE: we are assuming that the block group number is uint64, but we do not know that to be true +// +// it might be uint32 or uint64, and it might be in BigEndian as opposed to LittleEndian +// just have to start with this and see +// we do know that the maximum number of block groups in 32-bit mode is 2^19, which must be uint32 +// and in 64-bit mode it is 2^51 which must be uint64 +// So we start with uint32 = [4]byte{} for regular mode and [8]byte{} for mod32 +func groupDescriptorChecksum(b, superblockUUID []byte, groupNumber uint64, checksumType gdtChecksumType) uint16 { + var checksum uint16 + var input = superblockUUID + + groupBytes := make([]byte, 8) + binary.LittleEndian.PutUint64(groupBytes, groupNumber) + switch checksumType { + case gdtChecksumNone: + checksum = 0 + case gdtChecksumMetadata: + input = append(input, groupBytes...) + input = append(input, b...) + crc32Table := crc32.MakeTable(crc32.Castagnoli) + checksum32 := crc32.Checksum(input, crc32Table) + checksum = uint16(checksum32) & 0xffff + case gdtChecksumGdt: + input = append(input, groupBytes[0:4]...) + input = append(input, b...) + checksum = crc16(input) + } + return checksum +} diff --git a/filesystem/ext4/inode.go b/filesystem/ext4/inode.go new file mode 100644 index 00000000..a1098dd3 --- /dev/null +++ b/filesystem/ext4/inode.go @@ -0,0 +1,595 @@ +package ext4 + +import ( + "encoding/binary" + "fmt" + "hash/crc32" + "time" +) + +type inodeFlag uint32 +type fileType uint16 + +func (i inodeFlag) included(a uint32) bool { + return a&uint32(i) == uint32(i) +} +func (f fileType) included(a uint16) bool { + return a&uint16(f) == uint16(f) +} + +const ( + ext2InodeSize uint16 = 128 + // minInodeSize is ext2 + the extra min 32 bytes in ext4 + minInodeExtraSize uint16 = 32 + wantInodeExtraSize uint16 = 128 + minInodeSize uint16 = ext2InodeSize + minInodeExtraSize + extentInodeMaxEntries int = 4 + inodeFlagSecureDeletion inodeFlag = 0x1 + inodeFlagPreserveForUndeletion inodeFlag = 0x2 + inodeFlagCompressed inodeFlag = 0x4 + inodeFlagSynchronous inodeFlag = 0x8 + inodeFlagImmutable inodeFlag = 0x10 + inodeFlagAppendOnly inodeFlag = 0x20 + inodeFlagNoDump inodeFlag = 0x40 + inodeFlagNoAccessTimeUpdate inodeFlag = 0x80 + inodeFlagDirtyCompressed inodeFlag = 0x100 + inodeFlagCompressedClusters inodeFlag = 0x200 + inodeFlagNoCompress inodeFlag = 0x400 + inodeFlagEncryptedInode inodeFlag = 0x800 + inodeFlagHashedDirectoryIndexes inodeFlag = 0x1000 + inodeFlagAFSMagicDirectory inodeFlag = 0x2000 + inodeFlagAlwaysJournal inodeFlag = 0x4000 + inodeFlagNoMergeTail inodeFlag = 0x8000 + inodeFlagSyncDirectoryData inodeFlag = 0x10000 + inodeFlagTopDirectory inodeFlag = 0x20000 + inodeFlagHugeFile inodeFlag = 0x40000 + inodeFlagUsesExtents inodeFlag = 0x80000 + inodeFlagExtendedAttributes inodeFlag = 0x200000 + inodeFlagBlocksPastEOF inodeFlag = 0x400000 + inodeFlagSnapshot inodeFlag = 0x1000000 + inodeFlagDeletingSnapshot inodeFlag = 0x4000000 + inodeFlagCompletedSnapshotShrink inodeFlag = 0x8000000 + inodeFlagInlineData inodeFlag = 0x10000000 + inodeFlagInheritProject inodeFlag = 0x20000000 + + fileTypeFifo fileType = 0x1000 + fileTypeCharacterDevice fileType = 0x2000 + fileTypeDirectory fileType = 0x4000 + fileTypeBlockDevice fileType = 0x6000 + fileTypeRegularFile fileType = 0x8000 + fileTypeSymbolicLink fileType = 0xA000 + fileTypeSocket fileType = 0xC000 + + filePermissionsOwnerExecute uint16 = 0x40 + filePermissionsOwnerWrite uint16 = 0x80 + filePermissionsOwnerRead uint16 = 0x100 + filePermissionsGroupExecute uint16 = 0x8 + filePermissionsGroupWrite uint16 = 0x10 + filePermissionsGroupRead uint16 = 0x20 + filePermissionsOtherExecute uint16 = 0x1 + filePermissionsOtherWrite uint16 = 0x2 + filePermissionsOtherRead uint16 = 0x4 +) + +// mountOptions is a structure holding flags for an inode +type inodeFlags struct { + secureDeletion bool + preserveForUndeletion bool + compressed bool + synchronous bool + immutable bool + appendOnly bool + noDump bool + noAccessTimeUpdate bool + dirtyCompressed bool + compressedClusters bool + noCompress bool + encryptedInode bool + hashedDirectoryIndexes bool + AFSMagicDirectory bool + alwaysJournal bool + noMergeTail bool + syncDirectoryData bool + topDirectory bool + hugeFile bool + usesExtents bool + extendedAttributes bool + blocksPastEOF bool + snapshot bool + deletingSnapshot bool + completedSnapshotShrink bool + inlineData bool + inheritProject bool +} + +type filePermissions struct { + read bool + write bool + execute bool +} + +// inode is a structure holding the data about an inode +type inode struct { + number uint32 + permissionsOther filePermissions + permissionsGroup filePermissions + permissionsOwner filePermissions + fileType fileType + owner uint32 + group uint32 + size uint64 + accessTime time.Time + changeTime time.Time + modifyTime time.Time + createTime time.Time + deletionTime uint32 + hardLinks uint16 + blocks uint64 + filesystemBlocks bool + flags *inodeFlags + version uint64 + nfsFileVersion uint32 + extendedAttributeBlock uint64 + inodeSize uint16 + project uint32 + extents extentBlockFinder +} + +//nolint:unused // will be used in the future, not yet +func (i *inode) equal(a *inode) bool { + if (i == nil && a != nil) || (a == nil && i != nil) { + return false + } + if i == nil && a == nil { + return true + } + return *i == *a +} + +// inodeFromBytes create an inode struct from bytes +func inodeFromBytes(b []byte, sb *superblock, number uint32) (*inode, error) { + // safely make sure it is the min size + if len(b) < int(minInodeSize) { + return nil, fmt.Errorf("inode data too short: %d bytes, must be min %d bytes", len(b), minInodeSize) + } + + // checksum before using the data + checksumBytes := make([]byte, 4) + + // checksum before using the data + copy(checksumBytes[0:2], b[0x7c:0x7e]) + copy(checksumBytes[2:4], b[0x82:0x84]) + // zero out checksum fields before calculating the checksum + b[0x7c] = 0 + b[0x7d] = 0 + b[0x82] = 0 + b[0x83] = 0 + + checksum := binary.LittleEndian.Uint32(checksumBytes) + actualChecksum := inodeChecksum(b, sb.uuid.Bytes(), number) + + if actualChecksum != checksum { + return nil, fmt.Errorf("checksum mismatch, on-disk %x vs calculated %x", checksum, actualChecksum) + } + + // block count, reserved block count and free blocks depends on whether the fs is 64-bit or not + owner := make([]byte, 4) + fileSize := make([]byte, 8) + group := make([]byte, 4) + accessTime := make([]byte, 8) + changeTime := make([]byte, 8) + modifyTime := make([]byte, 8) + createTime := make([]byte, 8) + version := make([]byte, 8) + extendedAttributeBlock := make([]byte, 8) + + mode := binary.LittleEndian.Uint16(b[0x0:0x2]) + + copy(owner[0:2], b[0x2:0x4]) + copy(owner[2:4], b[0x78:0x7a]) + copy(group[0:2], b[0x18:0x20]) + copy(group[2:4], b[0x7a:0x7c]) + copy(fileSize[0:4], b[0x4:0x8]) + copy(fileSize[4:8], b[0x6c:0x70]) + copy(version[0:4], b[0x24:0x28]) + copy(version[4:8], b[0x98:0x9c]) + copy(extendedAttributeBlock[0:4], b[0x88:0x8c]) + copy(extendedAttributeBlock[4:6], b[0x76:0x78]) + + // get the the times + // the structure is as follows: + // original 32 bits (0:4) are seconds. Add (to the left) 2 more bits from the 32 + // the remaining 30 bites are nanoseconds + copy(accessTime[0:4], b[0x8:0xc]) + // take the two bits relevant and add to fifth byte + accessTime[4] = b[0x8c] & 0x3 + copy(changeTime[0:4], b[0xc:0x10]) + changeTime[4] = b[0x84] & 0x3 + copy(modifyTime[0:4], b[0x10:0x14]) + modifyTime[4] = b[0x88] & 0x3 + copy(createTime[0:4], b[0x90:0x94]) + createTime[4] = b[0x94] & 0x3 + + accessTimeSeconds := binary.LittleEndian.Uint64(accessTime) + changeTimeSeconds := binary.LittleEndian.Uint64(changeTime) + modifyTimeSeconds := binary.LittleEndian.Uint64(modifyTime) + createTimeSeconds := binary.LittleEndian.Uint64(createTime) + + // now get the nanoseconds by using the upper 30 bites + accessTimeNanoseconds := binary.LittleEndian.Uint32(b[0x8c:0x90]) >> 2 + changeTimeNanoseconds := binary.LittleEndian.Uint32(b[0x84:0x88]) >> 2 + modifyTimeNanoseconds := binary.LittleEndian.Uint32(b[0x88:0x8c]) >> 2 + createTimeNanoseconds := binary.LittleEndian.Uint32(b[0x94:0x98]) >> 2 + + flagsNum := binary.LittleEndian.Uint32(b[0x20:0x24]) + + flags := parseInodeFlags(flagsNum) + + blocksLow := binary.LittleEndian.Uint32(b[0x1c:0x20]) + blocksHigh := binary.LittleEndian.Uint16(b[0x74:0x76]) + var ( + blocks uint64 + filesystemBlocks bool + ) + + hugeFile := sb.features.hugeFile + switch { + case !hugeFile: + // just 512-byte blocks + blocks = uint64(blocksLow) + filesystemBlocks = false + case hugeFile && !flags.hugeFile: + // larger number of 512-byte blocks + blocks = uint64(blocksHigh)<<32 + uint64(blocksLow) + filesystemBlocks = false + default: + // larger number of filesystem blocks + blocks = uint64(blocksHigh)<<32 + uint64(blocksLow) + filesystemBlocks = true + } + + // parse the extent information in the inode to get the root of the extents tree + // we do not walk the entire tree, to get a slice of blocks for the file. + // If we want to do that, we call the extentBlockFinder.toBlocks() method + extentInfo := make([]byte, 60) + copy(extentInfo, b[0x28:0x64]) + allExtents, err := parseExtents(extentInfo, sb.blockSize, 0, uint32(blocks)) + if err != nil { + return nil, fmt.Errorf("error parsing extent tree: %v", err) + } + + i := inode{ + number: number, + permissionsGroup: parseGroupPermissions(mode), + permissionsOwner: parseOwnerPermissions(mode), + permissionsOther: parseOtherPermissions(mode), + fileType: parseFileType(mode), + owner: binary.LittleEndian.Uint32(owner), + group: binary.LittleEndian.Uint32(group), + size: binary.LittleEndian.Uint64(fileSize), + hardLinks: binary.LittleEndian.Uint16(b[0x1a:0x1c]), + blocks: blocks, + filesystemBlocks: filesystemBlocks, + flags: &flags, + nfsFileVersion: binary.LittleEndian.Uint32(b[0x64:0x68]), + version: binary.LittleEndian.Uint64(version), + inodeSize: binary.LittleEndian.Uint16(b[0x80:0x82]) + minInodeSize, + deletionTime: binary.LittleEndian.Uint32(b[0x14:0x18]), + accessTime: time.Unix(int64(accessTimeSeconds), int64(accessTimeNanoseconds)), + changeTime: time.Unix(int64(changeTimeSeconds), int64(changeTimeNanoseconds)), + modifyTime: time.Unix(int64(modifyTimeSeconds), int64(modifyTimeNanoseconds)), + createTime: time.Unix(int64(createTimeSeconds), int64(createTimeNanoseconds)), + extendedAttributeBlock: binary.LittleEndian.Uint64(extendedAttributeBlock), + project: binary.LittleEndian.Uint32(b[0x9c:0x100]), + extents: allExtents, + } + + return &i, nil +} + +// toBytes returns an inode ready to be written to disk +// +//nolint:unused // will be used in the future, not yet +func (i *inode) toBytes(sb *superblock) []byte { + iSize := sb.inodeSize + + b := make([]byte, iSize) + + mode := make([]byte, 2) + owner := make([]byte, 4) + fileSize := make([]byte, 8) + group := make([]byte, 4) + accessTime := make([]byte, 8) + changeTime := make([]byte, 8) + modifyTime := make([]byte, 8) + createTime := make([]byte, 8) + version := make([]byte, 8) + extendedAttributeBlock := make([]byte, 8) + + binary.LittleEndian.PutUint16(mode, i.permissionsGroup.toGroupInt()|i.permissionsOther.toOtherInt()|i.permissionsOwner.toOwnerInt()|uint16(i.fileType)) + binary.LittleEndian.PutUint32(owner, i.owner) + binary.LittleEndian.PutUint32(group, i.group) + binary.LittleEndian.PutUint64(fileSize, i.size) + binary.LittleEndian.PutUint64(version, i.version) + binary.LittleEndian.PutUint64(extendedAttributeBlock, i.extendedAttributeBlock) + + // there is some odd stuff that ext4 does with nanoseconds. We might need this in the future. + // See https://ext4.wiki.kernel.org/index.php/Ext4_Disk_Layout#Inode_Timestamps + // binary.LittleEndian.PutUint32(accessTime[4:8], (i.accessTimeNanoseconds<<2)&accessTime[4]) + binary.LittleEndian.PutUint64(accessTime, uint64(i.accessTime.Unix())) + binary.LittleEndian.PutUint32(accessTime[4:8], uint32(i.accessTime.Nanosecond())) + binary.LittleEndian.PutUint64(createTime, uint64(i.createTime.Unix())) + binary.LittleEndian.PutUint32(createTime[4:8], uint32(i.createTime.Nanosecond())) + binary.LittleEndian.PutUint64(changeTime, uint64(i.changeTime.Unix())) + binary.LittleEndian.PutUint32(changeTime[4:8], uint32(i.changeTime.Nanosecond())) + binary.LittleEndian.PutUint64(modifyTime, uint64(i.modifyTime.Unix())) + binary.LittleEndian.PutUint32(modifyTime[4:8], uint32(i.modifyTime.Nanosecond())) + + blocks := make([]byte, 8) + binary.LittleEndian.PutUint64(blocks, i.blocks) + + copy(b[0x0:0x2], mode) + copy(b[0x2:0x4], owner[0:2]) + copy(b[0x4:0x8], fileSize[0:4]) + copy(b[0x8:0xc], accessTime[0:4]) + copy(b[0xc:0x10], changeTime[0:4]) + copy(b[0x10:0x14], modifyTime[0:4]) + + binary.LittleEndian.PutUint32(b[0x14:0x18], i.deletionTime) + copy(b[0x18:0x1a], group[0:2]) + binary.LittleEndian.PutUint16(b[0x1a:0x1c], i.hardLinks) + copy(b[0x1c:0x20], blocks[0:4]) + binary.LittleEndian.PutUint32(b[0x20:0x24], i.flags.toInt()) + copy(b[0x24:0x28], version[0:4]) + copy(b[0x28:0x64], i.extents.toBytes()) + binary.LittleEndian.PutUint32(b[0x64:0x68], i.nfsFileVersion) + copy(b[0x68:0x6c], extendedAttributeBlock[0:4]) + copy(b[0x6c:0x70], fileSize[4:8]) + // b[0x70:0x74] is obsolete + copy(b[0x74:0x76], blocks[4:8]) + copy(b[0x76:0x78], extendedAttributeBlock[4:6]) + copy(b[0x78:0x7a], owner[2:4]) + copy(b[0x7a:0x7c], group[2:4]) + // b[0x7c:0x7e] is for checkeum + // b[0x7e:0x80] is unused + binary.LittleEndian.PutUint16(b[0x80:0x82], i.inodeSize-minInodeSize) + // b[0x82:0x84] is for checkeum + copy(b[0x84:0x88], changeTime[4:8]) + copy(b[0x88:0x8c], modifyTime[4:8]) + copy(b[0x8c:0x90], accessTime[4:8]) + copy(b[0x90:0x94], createTime[0:4]) + copy(b[0x94:0x98], createTime[4:8]) + + actualChecksum := inodeChecksum(b, sb.uuid.Bytes(), i.number) + checksum := make([]byte, 4) + binary.LittleEndian.PutUint32(checksum, actualChecksum) + copy(b[0x7c:0x7e], checksum[0:2]) + copy(b[0x82:0x84], checksum[2:4]) + + return b +} + +func parseOwnerPermissions(mode uint16) filePermissions { + return filePermissions{ + execute: mode&filePermissionsOwnerExecute == filePermissionsOwnerExecute, + write: mode&filePermissionsOwnerWrite == filePermissionsOwnerWrite, + read: mode&filePermissionsOwnerRead == filePermissionsOwnerRead, + } +} +func parseGroupPermissions(mode uint16) filePermissions { + return filePermissions{ + execute: mode&filePermissionsGroupExecute == filePermissionsGroupExecute, + write: mode&filePermissionsGroupWrite == filePermissionsGroupWrite, + read: mode&filePermissionsGroupRead == filePermissionsGroupRead, + } +} +func parseOtherPermissions(mode uint16) filePermissions { + return filePermissions{ + execute: mode&filePermissionsOtherExecute == filePermissionsOtherExecute, + write: mode&filePermissionsOtherWrite == filePermissionsOtherWrite, + read: mode&filePermissionsOtherRead == filePermissionsOtherRead, + } +} + +//nolint:unused // will be used in the future, not yet +func (fp *filePermissions) toOwnerInt() uint16 { + var mode uint16 + if fp.execute { + mode |= filePermissionsOwnerExecute + } + if fp.write { + mode |= filePermissionsOwnerWrite + } + if fp.read { + mode |= filePermissionsOwnerRead + } + return mode +} + +//nolint:unused // will be used in the future, not yet +func (fp *filePermissions) toOtherInt() uint16 { + var mode uint16 + if fp.execute { + mode |= filePermissionsOtherExecute + } + if fp.write { + mode |= filePermissionsOtherWrite + } + if fp.read { + mode |= filePermissionsOtherRead + } + return mode +} + +//nolint:unused // will be used in the future, not yet +func (fp *filePermissions) toGroupInt() uint16 { + var mode uint16 + if fp.execute { + mode |= filePermissionsGroupExecute + } + if fp.write { + mode |= filePermissionsGroupWrite + } + if fp.read { + mode |= filePermissionsGroupRead + } + return mode +} + +func parseFileType(mode uint16) fileType { + var f fileType + switch { + case fileTypeFifo.included(mode): + f = fileTypeFifo + case fileTypeBlockDevice.included(mode): + f = fileTypeBlockDevice + case fileTypeCharacterDevice.included(mode): + f = fileTypeCharacterDevice + case fileTypeDirectory.included(mode): + f = fileTypeDirectory + case fileTypeRegularFile.included(mode): + f = fileTypeRegularFile + case fileTypeSocket.included(mode): + f = fileTypeSocket + case fileTypeSymbolicLink.included(mode): + f = fileTypeSymbolicLink + } + return f +} + +func parseInodeFlags(flags uint32) inodeFlags { + return inodeFlags{ + secureDeletion: inodeFlagSecureDeletion.included(flags), + preserveForUndeletion: inodeFlagPreserveForUndeletion.included(flags), + compressed: inodeFlagCompressed.included(flags), + synchronous: inodeFlagSynchronous.included(flags), + immutable: inodeFlagImmutable.included(flags), + appendOnly: inodeFlagAppendOnly.included(flags), + noDump: inodeFlagNoDump.included(flags), + noAccessTimeUpdate: inodeFlagNoAccessTimeUpdate.included(flags), + dirtyCompressed: inodeFlagDirtyCompressed.included(flags), + compressedClusters: inodeFlagCompressedClusters.included(flags), + noCompress: inodeFlagNoCompress.included(flags), + encryptedInode: inodeFlagEncryptedInode.included(flags), + hashedDirectoryIndexes: inodeFlagHashedDirectoryIndexes.included(flags), + AFSMagicDirectory: inodeFlagAFSMagicDirectory.included(flags), + alwaysJournal: inodeFlagAlwaysJournal.included(flags), + noMergeTail: inodeFlagNoMergeTail.included(flags), + syncDirectoryData: inodeFlagSyncDirectoryData.included(flags), + topDirectory: inodeFlagTopDirectory.included(flags), + hugeFile: inodeFlagHugeFile.included(flags), + usesExtents: inodeFlagUsesExtents.included(flags), + extendedAttributes: inodeFlagExtendedAttributes.included(flags), + blocksPastEOF: inodeFlagBlocksPastEOF.included(flags), + snapshot: inodeFlagSnapshot.included(flags), + deletingSnapshot: inodeFlagDeletingSnapshot.included(flags), + completedSnapshotShrink: inodeFlagCompletedSnapshotShrink.included(flags), + inlineData: inodeFlagInlineData.included(flags), + inheritProject: inodeFlagInheritProject.included(flags), + } +} + +//nolint:unused // will be used in the future, not yet +func (i *inodeFlags) toInt() uint32 { + var flags uint32 + + if i.secureDeletion { + flags |= uint32(inodeFlagSecureDeletion) + } + if i.preserveForUndeletion { + flags |= uint32(inodeFlagPreserveForUndeletion) + } + if i.compressed { + flags |= uint32(inodeFlagCompressed) + } + if i.synchronous { + flags |= uint32(inodeFlagSynchronous) + } + if i.immutable { + flags |= uint32(inodeFlagImmutable) + } + if i.appendOnly { + flags |= uint32(inodeFlagAppendOnly) + } + if i.noDump { + flags |= uint32(inodeFlagNoDump) + } + if i.noAccessTimeUpdate { + flags |= uint32(inodeFlagNoAccessTimeUpdate) + } + if i.dirtyCompressed { + flags |= uint32(inodeFlagDirtyCompressed) + } + if i.compressedClusters { + flags |= uint32(inodeFlagCompressedClusters) + } + if i.noCompress { + flags |= uint32(inodeFlagNoCompress) + } + if i.encryptedInode { + flags |= uint32(inodeFlagEncryptedInode) + } + if i.hashedDirectoryIndexes { + flags |= uint32(inodeFlagHashedDirectoryIndexes) + } + if i.AFSMagicDirectory { + flags |= uint32(inodeFlagAFSMagicDirectory) + } + if i.alwaysJournal { + flags |= uint32(inodeFlagAlwaysJournal) + } + if i.noMergeTail { + flags |= uint32(inodeFlagNoMergeTail) + } + if i.syncDirectoryData { + flags |= uint32(inodeFlagSyncDirectoryData) + } + if i.topDirectory { + flags |= uint32(inodeFlagTopDirectory) + } + if i.hugeFile { + flags |= uint32(inodeFlagHugeFile) + } + if i.usesExtents { + flags |= uint32(inodeFlagUsesExtents) + } + if i.extendedAttributes { + flags |= uint32(inodeFlagExtendedAttributes) + } + if i.blocksPastEOF { + flags |= uint32(inodeFlagBlocksPastEOF) + } + if i.snapshot { + flags |= uint32(inodeFlagSnapshot) + } + if i.deletingSnapshot { + flags |= uint32(inodeFlagDeletingSnapshot) + } + if i.completedSnapshotShrink { + flags |= uint32(inodeFlagCompletedSnapshotShrink) + } + if i.inlineData { + flags |= uint32(inodeFlagInlineData) + } + if i.inheritProject { + flags |= uint32(inodeFlagInheritProject) + } + + return flags +} + +// inodeChecksum calculate the checksum for an inode +// NOTE: we are assuming that the inode number is uint64, but we do not know that to be true +// +// it might be uint32 or uint64, and it might be in BigEndian as opposed to LittleEndian +// just have to start with this and see +func inodeChecksum(b, superblockUUID []byte, inodeNumber uint32) uint32 { + numberBytes := make([]byte, 4) + binary.LittleEndian.PutUint32(numberBytes, inodeNumber) + input := superblockUUID + input = append(input, numberBytes...) + input = append(input, b...) + crc32Table := crc32.MakeTable(crc32.Castagnoli) + checksum := crc32.Checksum(input, crc32Table) + return checksum +} diff --git a/filesystem/ext4/journaldevice_other.go b/filesystem/ext4/journaldevice_other.go new file mode 100644 index 00000000..09a61488 --- /dev/null +++ b/filesystem/ext4/journaldevice_other.go @@ -0,0 +1,12 @@ +//go:build !linux && !unix && !darwin && !windows + +package ext4 + +import ( + "fmt" + "runtime" +) + +func journalDevice(devicePath string) (deviceNumber uint32, err error) { + return 0, fmt.Errorf("external journal device unsupported on filesystem %s", runtime.GOOS) +} diff --git a/filesystem/ext4/journaldevice_shared.go b/filesystem/ext4/journaldevice_shared.go new file mode 100644 index 00000000..00a91da9 --- /dev/null +++ b/filesystem/ext4/journaldevice_shared.go @@ -0,0 +1,40 @@ +//go:build linux || unix || freebsd || netbsd || openbsd || darwin + +package ext4 + +import ( + "fmt" + "math" + + "golang.org/x/sys/unix" +) + +func journalDevice(devicePath string) (deviceNumber uint32, err error) { + // Use unix.Stat to get file status + var stat unix.Stat_t + err = unix.Stat(devicePath, &stat) + if err != nil { + return deviceNumber, err + } + + // Extract major and minor device numbers + //nolint:unconvert,nolintlint // lint stumbles on this, thinks it is an unnecessary conversion, which is true + // on Linux, but not on others. So we will be explicit about this, and add a nolint flag + major := unix.Major(uint64(stat.Rdev)) + //nolint:unconvert,nolintlint // lint stumbles on this, thinks it is an unnecessary conversion, which is true + // on Linux, but not on others. So we will be explicit about this, and add a nolint flag + minor := unix.Minor(uint64(stat.Rdev)) + + // Combine major and minor numbers using unix.Mkdev + // interestingly, this does not 100% align with what I read about linux mkdev works, which would be: + // const minorbits = 20 + // func mkdev(major, minor uint32) uint32 { + // return (((major) << minorbits) | (minor)) + // } + // we leave this here for a future potential fix + journalDeviceNumber64 := unix.Mkdev(major, minor) + if journalDeviceNumber64 > math.MaxUint32 { + return deviceNumber, fmt.Errorf("journal device number %d is too large", journalDeviceNumber64) + } + return uint32(journalDeviceNumber64), nil +} diff --git a/filesystem/ext4/journaldevice_windows.go b/filesystem/ext4/journaldevice_windows.go new file mode 100644 index 00000000..bf36fb2e --- /dev/null +++ b/filesystem/ext4/journaldevice_windows.go @@ -0,0 +1,11 @@ +//go:build windows + +package ext4 + +import ( + "errors" +) + +func journalDevice(devicePath string) (deviceNumber uint32, err error) { + return 0, errors.New("external journal device unsupported on Windows") +} diff --git a/filesystem/ext4/md4/md4.go b/filesystem/ext4/md4/md4.go new file mode 100644 index 00000000..77df4270 --- /dev/null +++ b/filesystem/ext4/md4/md4.go @@ -0,0 +1,73 @@ +package md4 + +// rotateLeft rotates a 32-bit integer to the left +func rotateLeft(x uint32, s uint) uint32 { + return (x << s) | (x >> (32 - s)) +} + +// basic MD4 functions +func f(x, y, z uint32) uint32 { + return z ^ (x & (y ^ z)) +} + +func g(x, y, z uint32) uint32 { + return (x & y) + ((x ^ y) & z) +} + +func h(x, y, z uint32) uint32 { + return x ^ y ^ z +} + +// MD4 constants +const ( + k1 uint32 = 0 + k2 uint32 = 0x5A827999 + k3 uint32 = 0x6ED9EBA1 +) + +// round applies the round function as a macro +func round(f func(uint32, uint32, uint32) uint32, a, b, c, d, x uint32, s uint) uint32 { + return rotateLeft(a+f(b, c, d)+x, s) +} + +// halfMD4Transform basic cut-down MD4 transform. Returns only 32 bits of result. +func HalfMD4Transform(buf [4]uint32, in []uint32) uint32 { + var a, b, c, d = buf[0], buf[1], buf[2], buf[3] + + /* Round 1 */ + a = round(f, a, b, c, d, in[0]+k1, 3) + d = round(f, d, a, b, c, in[1]+k1, 7) + c = round(f, c, d, a, b, in[2]+k1, 11) + b = round(f, b, c, d, a, in[3]+k1, 19) + a = round(f, a, b, c, d, in[4]+k1, 3) + d = round(f, d, a, b, c, in[5]+k1, 7) + c = round(f, c, d, a, b, in[6]+k1, 11) + b = round(f, b, c, d, a, in[7]+k1, 19) + + /* Round 2 */ + a = round(g, a, b, c, d, in[1]+k2, 3) + d = round(g, d, a, b, c, in[3]+k2, 5) + c = round(g, c, d, a, b, in[5]+k2, 9) + b = round(g, b, c, d, a, in[7]+k2, 13) + a = round(g, a, b, c, d, in[0]+k2, 3) + d = round(g, d, a, b, c, in[2]+k2, 5) + c = round(g, c, d, a, b, in[4]+k2, 9) + b = round(g, b, c, d, a, in[6]+k2, 13) + + /* Round 3 */ + a = round(h, a, b, c, d, in[3]+k3, 3) + d = round(h, d, a, b, c, in[7]+k3, 9) + c = round(h, c, d, a, b, in[2]+k3, 11) + b = round(h, b, c, d, a, in[6]+k3, 15) + a = round(h, a, b, c, d, in[1]+k3, 3) + d = round(h, d, a, b, c, in[5]+k3, 9) + c = round(h, c, d, a, b, in[0]+k3, 11) + b = round(h, b, c, d, a, in[4]+k3, 15) + + buf[0] += a + buf[1] += b + buf[2] += c + buf[3] += d + + return buf[1] +} diff --git a/filesystem/ext4/md4/md4_test.go b/filesystem/ext4/md4/md4_test.go new file mode 100644 index 00000000..23c2a1e4 --- /dev/null +++ b/filesystem/ext4/md4/md4_test.go @@ -0,0 +1,151 @@ +package md4 + +import ( + "testing" +) + +// Test rotateLeft function +func TestRotateLeft(t *testing.T) { + tests := []struct { + x uint32 + s uint + expect uint32 + }{ + {x: 0x12345678, s: 0, expect: 0x12345678}, + {x: 0x12345678, s: 4, expect: 0x23456781}, + {x: 0x12345678, s: 16, expect: 0x56781234}, + {x: 0x12345678, s: 32, expect: 0x12345678}, + } + + for _, tt := range tests { + result := rotateLeft(tt.x, tt.s) + if result != tt.expect { + t.Errorf("rotateLeft(%#x, %d) = %#x; want %#x", tt.x, tt.s, result, tt.expect) + } + } +} + +// Test f function +func TestF(t *testing.T) { + tests := []struct { + x, y, z uint32 + expect uint32 + }{ + {x: 0xFFFFFFFF, y: 0xAAAAAAAA, z: 0x55555555, expect: 0xAAAAAAAA}, + {x: 0x0, y: 0xAAAAAAAA, z: 0x55555555, expect: 0x55555555}, + {x: 0x12345678, y: 0x9ABCDEF0, z: 0x0FEDCBA9, expect: 0x1ffddff1}, + } + + for _, tt := range tests { + result := f(tt.x, tt.y, tt.z) + if result != tt.expect { + t.Errorf("f(%#x, %#x, %#x) = %#x; want %#x", tt.x, tt.y, tt.z, result, tt.expect) + } + } +} + +// Test g function +func TestG(t *testing.T) { + tests := []struct { + x, y, z uint32 + expect uint32 + }{ + {x: 0xFFFFFFFF, y: 0xAAAAAAAA, z: 0x55555555, expect: 0xffffffff}, + {x: 0x0, y: 0xAAAAAAAA, z: 0x55555555, expect: 0x0}, + {x: 0x12345678, y: 0x9ABCDEF0, z: 0x0FEDCBA9, expect: 0x1abcdef8}, + } + + for _, tt := range tests { + result := g(tt.x, tt.y, tt.z) + if result != tt.expect { + t.Errorf("g(%#x, %#x, %#x) = %#x; want %#x", tt.x, tt.y, tt.z, result, tt.expect) + } + } +} + +// Test h function +func TestH(t *testing.T) { + tests := []struct { + x, y, z uint32 + expect uint32 + }{ + {x: 0xFFFFFFFF, y: 0xAAAAAAAA, z: 0x55555555, expect: 0x0}, + {x: 0x0, y: 0xAAAAAAAA, z: 0x55555555, expect: 0xFFFFFFFF}, + {x: 0x12345678, y: 0x9ABCDEF0, z: 0x0FEDCBA9, expect: 0x87654321}, + } + + for _, tt := range tests { + result := h(tt.x, tt.y, tt.z) + if result != tt.expect { + t.Errorf("h(%#x, %#x, %#x) = %#x; want %#x", tt.x, tt.y, tt.z, result, tt.expect) + } + } +} + +// Test round function +func TestRound(t *testing.T) { + tests := []struct { + name string + f func(x, y, z uint32) uint32 + a, b, c, d uint32 + x uint32 + s uint + expect uint32 + }{ + {"f", f, 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0x12345678, 3, 0x91a2b3b8}, + {"g", g, 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0x12345678, 5, 0x468acee2}, + {"h", h, 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0x12345678, 7, 0x5f4e3d70}, + } + + for _, tt := range tests { + a, b, c, d := tt.a, tt.b, tt.c, tt.d + result := round(tt.f, a, b, c, d, tt.x, tt.s) + if result != tt.expect { + t.Errorf("round(%s, %d) = %#x; want %#x", tt.name, tt.s, result, tt.expect) + } + } +} + +func TestHalfMD4Transform(t *testing.T) { + var buf = [4]uint32{0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476} + tests := []struct { + name string + in [8]uint32 + expect uint32 + }{ + { + name: "Test Case 1", + in: [8]uint32{0, 1, 2, 3, 4, 5, 6, 7}, + expect: 0xF254F422, + }, + { + name: "Test Case 2", + in: [8]uint32{0x12345678, 0x9ABCDEF0, 0x0FEDCBA9, 0x87654321, 0x11223344, 0xAABBCCDD, 0x55667788, 0x99AABBCC}, + expect: 0xA4405E22, + }, + { + name: "Test Case 3", + in: [8]uint32{0x00000000, 0xFFFFFFFF, 0xAAAAAAAA, 0x55555555, 0x33333333, 0x66666666, 0x99999999, 0xCCCCCCCC}, + expect: 0x35B92DEF, + }, + { + name: "Test Case 4 (Empty Input)", + in: [8]uint32{0, 0, 0, 0, 0, 0, 0, 0}, + expect: 0x5B0AA4BE, + }, + { + name: "Test Case 5 (Random Input)", + in: [8]uint32{0x89ABCDEF, 0x01234567, 0xFEDCBA98, 0x76543210, 0xA1B2C3D4, 0x0BADC0DE, 0xDEADBEEF, 0xCAFEBABE}, + expect: 0x2748FDB6, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := HalfMD4Transform(buf, tt.in[:]) + if result != tt.expect { + t.Errorf("halfMD4Transform(%#v, %#v) = %#x; want %#x", buf, tt.in, result, tt.expect) + } + }) + } +} diff --git a/filesystem/ext4/miscflags.go b/filesystem/ext4/miscflags.go new file mode 100644 index 00000000..d2a22368 --- /dev/null +++ b/filesystem/ext4/miscflags.go @@ -0,0 +1,34 @@ +package ext4 + +// miscFlags is a structure holding various miscellaneous flags +type miscFlags struct { + signedDirectoryHash bool + unsignedDirectoryHash bool + developmentTest bool +} + +func parseMiscFlags(flags uint32) miscFlags { + m := miscFlags{ + signedDirectoryHash: flagSignedDirectoryHash.included(flags), + unsignedDirectoryHash: flagUnsignedDirectoryHash.included(flags), + developmentTest: flagTestDevCode.included(flags), + } + return m +} + +func (m *miscFlags) toInt() uint32 { + var flags uint32 + + if m.signedDirectoryHash { + flags |= uint32(flagSignedDirectoryHash) + } + if m.unsignedDirectoryHash { + flags |= uint32(flagUnsignedDirectoryHash) + } + if m.developmentTest { + flags |= uint32(flagTestDevCode) + } + return flags +} + +var defaultMiscFlags = miscFlags{} diff --git a/filesystem/ext4/mountoptions.go b/filesystem/ext4/mountoptions.go new file mode 100644 index 00000000..a93a21a1 --- /dev/null +++ b/filesystem/ext4/mountoptions.go @@ -0,0 +1,182 @@ +package ext4 + +const ( + // default mount options + mountPrintDebugInfo mountOption = 0x1 + mountNewFilesGIDContainingDirectory mountOption = 0x2 + mountUserspaceExtendedAttributes mountOption = 0x4 + mountPosixACLs mountOption = 0x8 + mount16BitUIDs mountOption = 0x10 + mountJournalDataAndMetadata mountOption = 0x20 + mountFlushBeforeJournal mountOption = 0x40 + mountUnorderingDataMetadata mountOption = 0x60 + mountDisableWriteFlushes mountOption = 0x100 + mountTrackMetadataBlocks mountOption = 0x200 + mountDiscardDeviceSupport mountOption = 0x400 + mountDisableDelayedAllocation mountOption = 0x800 +) + +// mountOptions is a structure holding which default mount options are set +type mountOptions struct { + printDebugInfo bool + newFilesGIDContainingDirectory bool + userspaceExtendedAttributes bool + posixACLs bool + use16BitUIDs bool + journalDataAndMetadata bool + flushBeforeJournal bool + unorderingDataMetadata bool + disableWriteFlushes bool + trackMetadataBlocks bool + discardDeviceSupport bool + disableDelayedAllocation bool +} + +type mountOption uint32 + +func (m mountOption) included(a uint32) bool { + return a&uint32(m) == uint32(m) +} + +type MountOpt func(*mountOptions) + +func WithDefaultMountOptionPrintDebuggingInfo(enable bool) MountOpt { + return func(o *mountOptions) { + o.printDebugInfo = enable + } +} + +func WithDefaultMountOptionGIDFromDirectory(enable bool) MountOpt { + return func(o *mountOptions) { + o.newFilesGIDContainingDirectory = enable + } +} + +func WithDefaultMountOptionUserspaceXattrs(enable bool) MountOpt { + return func(o *mountOptions) { + o.userspaceExtendedAttributes = enable + } +} + +func WithDefaultMountOptionPOSIXACLs(enable bool) MountOpt { + return func(o *mountOptions) { + o.posixACLs = enable + } +} + +func WithDefaultMountOptionUID16Bit(enable bool) MountOpt { + return func(o *mountOptions) { + o.use16BitUIDs = enable + } +} + +func WithDefaultMountOptionJournalModeData(enable bool) MountOpt { + return func(o *mountOptions) { + o.journalDataAndMetadata = enable + } +} + +func WithDefaultMountOptionJournalModeOrdered(enable bool) MountOpt { + return func(o *mountOptions) { + o.flushBeforeJournal = enable + } +} + +func WithDefaultMountOptionJournalModeWriteback(enable bool) MountOpt { + return func(o *mountOptions) { + o.unorderingDataMetadata = enable + } +} + +func WithDefaultMountOptionDisableWriteFlushes(enable bool) MountOpt { + return func(o *mountOptions) { + o.disableWriteFlushes = enable + } +} + +func WithDefaultMountOptionBlockValidity(enable bool) MountOpt { + return func(o *mountOptions) { + o.trackMetadataBlocks = enable + } +} + +func WithDefaultMountOptionDiscardSupport(enable bool) MountOpt { + return func(o *mountOptions) { + o.discardDeviceSupport = enable + } +} + +func WithDefaultMountOptionDisableDelayedAllocation(enable bool) MountOpt { + return func(o *mountOptions) { + o.disableDelayedAllocation = enable + } +} + +func defaultMountOptionsFromOpts(opts []MountOpt) *mountOptions { + o := &mountOptions{} + for _, opt := range opts { + opt(o) + } + return o +} + +func parseMountOptions(flags uint32) mountOptions { + m := mountOptions{ + printDebugInfo: mountPrintDebugInfo.included(flags), + newFilesGIDContainingDirectory: mountNewFilesGIDContainingDirectory.included(flags), + userspaceExtendedAttributes: mountUserspaceExtendedAttributes.included(flags), + posixACLs: mountPosixACLs.included(flags), + use16BitUIDs: mount16BitUIDs.included(flags), + journalDataAndMetadata: mountJournalDataAndMetadata.included(flags), + flushBeforeJournal: mountFlushBeforeJournal.included(flags), + unorderingDataMetadata: mountUnorderingDataMetadata.included(flags), + disableWriteFlushes: mountDisableWriteFlushes.included(flags), + trackMetadataBlocks: mountTrackMetadataBlocks.included(flags), + discardDeviceSupport: mountDiscardDeviceSupport.included(flags), + disableDelayedAllocation: mountDisableDelayedAllocation.included(flags), + } + return m +} + +func (m *mountOptions) toInt() uint32 { + var flags uint32 + + if m.printDebugInfo { + flags |= uint32(mountPrintDebugInfo) + } + if m.newFilesGIDContainingDirectory { + flags |= uint32(mountNewFilesGIDContainingDirectory) + } + if m.userspaceExtendedAttributes { + flags |= uint32(mountUserspaceExtendedAttributes) + } + if m.posixACLs { + flags |= uint32(mountPosixACLs) + } + if m.use16BitUIDs { + flags |= uint32(mount16BitUIDs) + } + if m.journalDataAndMetadata { + flags |= uint32(mountJournalDataAndMetadata) + } + if m.flushBeforeJournal { + flags |= uint32(mountFlushBeforeJournal) + } + if m.unorderingDataMetadata { + flags |= uint32(mountUnorderingDataMetadata) + } + if m.disableWriteFlushes { + flags |= uint32(mountDisableWriteFlushes) + } + if m.trackMetadataBlocks { + flags |= uint32(mountTrackMetadataBlocks) + } + if m.discardDeviceSupport { + flags |= uint32(mountDiscardDeviceSupport) + } + if m.disableDelayedAllocation { + flags |= uint32(mountDisableDelayedAllocation) + } + + return flags +} diff --git a/filesystem/ext4/superblock.go b/filesystem/ext4/superblock.go new file mode 100644 index 00000000..da1d4f74 --- /dev/null +++ b/filesystem/ext4/superblock.go @@ -0,0 +1,652 @@ +package ext4 + +import ( + "encoding/binary" + "fmt" + "hash/crc32" + "math" + "reflect" + "sort" + "time" + + "github.com/diskfs/go-diskfs/util" + uuid "github.com/satori/go.uuid" +) + +type filesystemState uint16 +type errorBehaviour uint16 +type osFlag uint32 +type feature uint32 +type hashAlgorithm byte +type flag uint32 +type encryptionAlgorithm byte + +func (f feature) included(a uint32) bool { + return a&uint32(f) == uint32(f) +} + +//nolint:unused // we know this is unused, but it will be needed in future +func (f flag) equal(a flag) bool { + return f == a +} +func (f flag) included(a uint32) bool { + return a&uint32(f) == uint32(f) +} + +const ( + // superblockSignature is the signature for every superblock + superblockSignature uint16 = 0xef53 + // optional states for the filesystem + fsStateCleanlyUnmounted filesystemState = 0x0001 + fsStateErrors filesystemState = 0x0002 + fsStateOrphansRecovered filesystemState = 0x0004 + // how to handle erorrs + errorsContinue errorBehaviour = 1 + errorsRemountReadOnly errorBehaviour = 2 + errorsPanic errorBehaviour = 3 + // checksum type + checkSumTypeCRC32c byte = 1 + // oses + osLinux osFlag = 0 + osHurd osFlag = 1 + osMasix osFlag = 2 + osFreeBSD osFlag = 3 + osLites osFlag = 4 + // compatible, incompatible, and compatibleReadOnly feature flags + compatFeatureDirectoryPreAllocate feature = 0x1 + compatFeatureImagicInodes feature = 0x2 + compatFeatureHasJournal feature = 0x4 + compatFeatureExtendedAttributes feature = 0x8 + compatFeatureReservedGDTBlocksForExpansion feature = 0x10 + compatFeatureDirectoryIndices feature = 0x20 + compatFeatureLazyBlockGroup feature = 0x40 + compatFeatureExcludeInode feature = 0x80 + compatFeatureExcludeBitmap feature = 0x100 + compatFeatureSparseSuperBlockV2 feature = 0x200 + incompatFeatureCompression feature = 0x1 + incompatFeatureDirectoryEntriesRecordFileType feature = 0x2 + incompatFeatureRecoveryNeeded feature = 0x4 + incompatFeatureSeparateJournalDevice feature = 0x8 + incompatFeatureMetaBlockGroups feature = 0x10 + incompatFeatureExtents feature = 0x40 + incompatFeature64Bit feature = 0x80 + incompatFeatureMultipleMountProtection feature = 0x100 + incompatFeatureFlexBlockGroups feature = 0x200 + incompatFeatureExtendedAttributeInodes feature = 0x400 + incompatFeatureDataInDirectoryEntries feature = 0x1000 + incompatFeatureMetadataChecksumSeedInSuperblock feature = 0x2000 + incompatFeatureLargeDirectory feature = 0x4000 + incompatFeatureDataInInode feature = 0x8000 + incompatFeatureEncryptInodes feature = 0x10000 + roCompatFeatureSparseSuperblock feature = 0x1 + roCompatFeatureLargeFile feature = 0x2 + roCompatFeatureBtreeDirectory feature = 0x4 + roCompatFeatureHugeFile feature = 0x8 + roCompatFeatureGDTChecksum feature = 0x10 + roCompatFeatureLargeSubdirectoryCount feature = 0x20 + roCompatFeatureLargeInodes feature = 0x40 + roCompatFeatureSnapshot feature = 0x80 + roCompatFeatureQuota feature = 0x100 + roCompatFeatureBigalloc feature = 0x200 + roCompatFeatureMetadataChecksums feature = 0x400 + roCompatFeatureReplicas feature = 0x800 + roCompatFeatureReadOnly feature = 0x1000 + roCompatFeatureProjectQuotas feature = 0x2000 + // hash algorithms for htree directory entries + hashLegacy hashAlgorithm = 0x0 + hashHalfMD4 hashAlgorithm = 0x1 + hashTea hashAlgorithm = 0x2 + hashLegacyUnsigned hashAlgorithm = 0x3 + hashHalfMD4Unsigned hashAlgorithm = 0x4 + hashTeaUnsigned hashAlgorithm = 0x5 + // miscellaneous flags + flagSignedDirectoryHash flag = 0x0001 + flagUnsignedDirectoryHash flag = 0x0002 + flagTestDevCode flag = 0x0004 + // encryption algorithms + //nolint:unused // we know these are unused, but they will be needed in the future + encryptionAlgorithmInvalid encryptionAlgorithm = 0 + encryptionAlgorithm256AESXTS encryptionAlgorithm = 1 + encryptionAlgorithm256AESGCM encryptionAlgorithm = 2 + encryptionAlgorithm256AESCBC encryptionAlgorithm = 3 +) + +// journalBackup is a backup in the superblock of the journal's inode i_block[] array and size +type journalBackup struct { + iBlocks []uint32 + iSize uint64 +} + +// Superblock is a structure holding the ext4 superblock +type superblock struct { + inodeCount uint32 + blockCount uint64 + reservedBlocks uint64 + freeBlocks uint64 + freeInodes uint32 + firstDataBlock uint32 + blockSize uint32 + clusterSize uint64 + blocksPerGroup uint32 + clustersPerGroup uint32 + inodesPerGroup uint32 + mountTime time.Time + writeTime time.Time + mountCount uint16 + mountsToFsck uint16 + filesystemState filesystemState + errorBehaviour errorBehaviour + minorRevision uint16 + lastCheck time.Time + checkInterval uint32 + creatorOS osFlag + revisionLevel uint32 + reservedBlocksDefaultUID uint16 + reservedBlocksDefaultGID uint16 + firstNonReservedInode uint32 + inodeSize uint16 + blockGroup uint16 + features featureFlags + uuid *uuid.UUID + volumeLabel string + lastMountedDirectory string + algorithmUsageBitmap uint32 + preallocationBlocks byte + preallocationDirectoryBlocks byte + reservedGDTBlocks uint16 + journalSuperblockUUID *uuid.UUID + journalInode uint32 + journalDeviceNumber uint32 + orphanedInodesStart uint32 + hashTreeSeed []uint32 + hashVersion hashAlgorithm + groupDescriptorSize uint16 + defaultMountOptions mountOptions + firstMetablockGroup uint32 + mkfsTime time.Time + journalBackup *journalBackup + // 64-bit mode features + inodeMinBytes uint16 + inodeReserveBytes uint16 + miscFlags miscFlags + raidStride uint16 + multiMountPreventionInterval uint16 + multiMountProtectionBlock uint64 + raidStripeWidth uint32 + logGroupsPerFlex uint64 + checksumType byte + totalKBWritten uint64 + snapshotInodeNumber uint32 + snapshotID uint32 + snapshotReservedBlocks uint64 + snapshotStartInode uint32 + errorCount uint32 + errorFirstTime time.Time + errorFirstInode uint32 + errorFirstBlock uint64 + errorFirstFunction string + errorFirstLine uint32 + errorLastTime time.Time + errorLastInode uint32 + errorLastLine uint32 + errorLastBlock uint64 + errorLastFunction string + mountOptions string + userQuotaInode uint32 + groupQuotaInode uint32 + overheadBlocks uint32 + backupSuperblockBlockGroups [2]uint32 + encryptionAlgorithms []encryptionAlgorithm + encryptionSalt []byte + lostFoundInode uint32 + projectQuotaInode uint32 + checksumSeed uint32 +} + +func (sb *superblock) equal(o *superblock) bool { + if (sb == nil && o != nil) || (o == nil && sb != nil) { + return false + } + if sb == nil && o == nil { + return true + } + return reflect.DeepEqual(sb, o) +} + +// FSInformationSectorFromBytes create an FSInformationSector struct from bytes +func superblockFromBytes(b []byte) (*superblock, error) { + bLen := len(b) + if bLen != int(SuperblockSize) { + return nil, fmt.Errorf("cannot read superblock from %d bytes instead of expected %d", bLen, SuperblockSize) + } + + // check the magic signature + actualSignature := binary.LittleEndian.Uint16(b[0x38:0x3a]) + if actualSignature != superblockSignature { + return nil, fmt.Errorf("erroneous signature at location 0x38 was %x instead of expected %x", actualSignature, superblockSignature) + } + + sb := superblock{} + + // first read feature flags of various types + compatFlags := binary.LittleEndian.Uint32(b[0x5c:0x60]) + incompatFlags := binary.LittleEndian.Uint32(b[0x60:0x64]) + roCompatFlags := binary.LittleEndian.Uint32(b[0x64:0x68]) + // track which ones are set + sb.features = parseFeatureFlags(compatFlags, incompatFlags, roCompatFlags) + + sb.inodeCount = binary.LittleEndian.Uint32(b[0:4]) + + // block count, reserved block count and free blocks depends on whether the fs is 64-bit or not + blockCount := make([]byte, 8) + reservedBlocks := make([]byte, 8) + freeBlocks := make([]byte, 8) + + copy(blockCount[0:4], b[0x4:0x8]) + copy(reservedBlocks[0:4], b[0x8:0xc]) + copy(freeBlocks[0:4], b[0xc:0x10]) + + if sb.features.fs64Bit { + copy(blockCount[4:8], b[0x150:0x154]) + copy(reservedBlocks[4:8], b[0x154:0x158]) + copy(freeBlocks[4:8], b[0x158:0x15c]) + } + sb.blockCount = binary.LittleEndian.Uint64(blockCount) + sb.reservedBlocks = binary.LittleEndian.Uint64(reservedBlocks) + sb.freeBlocks = binary.LittleEndian.Uint64(freeBlocks) + + sb.freeInodes = binary.LittleEndian.Uint32(b[0x10:0x14]) + sb.firstDataBlock = binary.LittleEndian.Uint32(b[0x14:0x18]) + sb.blockSize = uint32(math.Exp2(float64(10 + binary.LittleEndian.Uint32(b[0x18:0x1c])))) + sb.clusterSize = uint64(math.Exp2(float64(binary.LittleEndian.Uint32(b[0x1c:0x20])))) + sb.blocksPerGroup = binary.LittleEndian.Uint32(b[0x20:0x24]) + if sb.features.bigalloc { + sb.clustersPerGroup = binary.LittleEndian.Uint32(b[0x24:0x28]) + } + sb.inodesPerGroup = binary.LittleEndian.Uint32(b[0x28:0x2c]) + sb.mountTime = time.Unix(int64(binary.LittleEndian.Uint32(b[0x2c:0x30])), 0) + sb.writeTime = time.Unix(int64(binary.LittleEndian.Uint32(b[0x30:0x34])), 0) + sb.mountCount = binary.LittleEndian.Uint16(b[0x34:0x36]) + sb.mountsToFsck = binary.LittleEndian.Uint16(b[0x36:0x38]) + + sb.filesystemState = filesystemState(binary.LittleEndian.Uint16(b[0x3a:0x3c])) + sb.errorBehaviour = errorBehaviour(binary.LittleEndian.Uint16(b[0x3c:0x3e])) + + sb.minorRevision = binary.LittleEndian.Uint16(b[0x3e:0x40]) + sb.lastCheck = time.Unix(int64(binary.LittleEndian.Uint32(b[0x40:0x44])), 0) + sb.checkInterval = binary.LittleEndian.Uint32(b[0x44:0x48]) + + sb.creatorOS = osFlag(binary.LittleEndian.Uint32(b[0x48:0x4c])) + sb.revisionLevel = binary.LittleEndian.Uint32(b[0x4c:0x50]) + sb.reservedBlocksDefaultUID = binary.LittleEndian.Uint16(b[0x50:0x52]) + sb.reservedBlocksDefaultGID = binary.LittleEndian.Uint16(b[0x52:0x54]) + + sb.firstNonReservedInode = binary.LittleEndian.Uint32(b[0x54:0x58]) + sb.inodeSize = binary.LittleEndian.Uint16(b[0x58:0x5a]) + sb.blockGroup = binary.LittleEndian.Uint16(b[0x5a:0x5c]) + + voluuid, err := uuid.FromBytes(b[0x68:0x78]) + if err != nil { + return nil, fmt.Errorf("unable to read volume UUID: %v", err) + } + sb.uuid = &voluuid + sb.volumeLabel = string(b[0x78:0x88]) + sb.lastMountedDirectory = string(b[0x88:0xc8]) + sb.algorithmUsageBitmap = binary.LittleEndian.Uint32(b[0xc8:0xcc]) + + sb.preallocationBlocks = b[0xcc] + sb.preallocationDirectoryBlocks = b[0xcd] + sb.reservedGDTBlocks = binary.LittleEndian.Uint16(b[0xce:0xd0]) + + journaluuid, err := uuid.FromBytes(b[0xd0:0xe0]) + if err != nil { + return nil, fmt.Errorf("unable to read journal UUID: %v", err) + } + sb.journalSuperblockUUID = &journaluuid + sb.journalInode = binary.LittleEndian.Uint32(b[0xe0:0xe4]) + sb.journalDeviceNumber = binary.LittleEndian.Uint32(b[0xe4:0xe8]) + sb.orphanedInodesStart = binary.LittleEndian.Uint32(b[0xe8:0xec]) + + htreeSeed := make([]uint32, 0, 4) + htreeSeed = append(htreeSeed, + binary.LittleEndian.Uint32(b[0xec:0xf0]), + binary.LittleEndian.Uint32(b[0xf0:0xf4]), + binary.LittleEndian.Uint32(b[0xf4:0xf8]), + binary.LittleEndian.Uint32(b[0xf8:0xfc]), + ) + sb.hashTreeSeed = htreeSeed + + sb.hashVersion = hashAlgorithm(b[0xfc]) + + sb.groupDescriptorSize = binary.LittleEndian.Uint16(b[0xfe:0x100]) + + sb.defaultMountOptions = parseMountOptions(binary.LittleEndian.Uint32(b[0x100:0x104])) + sb.firstMetablockGroup = binary.LittleEndian.Uint32(b[0x104:0x108]) + sb.mkfsTime = time.Unix(int64(binary.LittleEndian.Uint32(b[0x108:0x10c])), 0) + + journalBackupType := b[0xfd] + if journalBackupType == 0 { + journalBackupArray := make([]uint32, 0, 15) + startJournalBackup := 0x10c + for i := 0; i < 15; i++ { + start := startJournalBackup + 4*i + end := startJournalBackup + 4*i + 4 + journalBackupArray = append(journalBackupArray, binary.LittleEndian.Uint32(b[start:end])) + } + iSizeBytes := make([]byte, 8) + + copy(iSizeBytes[0:4], b[startJournalBackup+4*16:startJournalBackup+4*17]) + copy(iSizeBytes[4:8], b[startJournalBackup+4*15:startJournalBackup+4*16]) + + sb.journalBackup = &journalBackup{ + iSize: binary.LittleEndian.Uint64(iSizeBytes), + iBlocks: journalBackupArray, + } + } + + sb.inodeMinBytes = binary.LittleEndian.Uint16(b[0x15c:0x15e]) + sb.inodeReserveBytes = binary.LittleEndian.Uint16(b[0x15e:0x160]) + sb.miscFlags = parseMiscFlags(binary.LittleEndian.Uint32(b[0x160:0x164])) + + sb.raidStride = binary.LittleEndian.Uint16(b[0x164:0x166]) + sb.raidStripeWidth = binary.LittleEndian.Uint32(b[0x170:0x174]) + + sb.multiMountPreventionInterval = binary.LittleEndian.Uint16(b[0x166:0x168]) + sb.multiMountProtectionBlock = binary.LittleEndian.Uint64(b[0x168:0x170]) + + sb.logGroupsPerFlex = uint64(math.Exp2(float64(b[0x174]))) + + sb.checksumType = b[0x175] // only valid one is 1 + if sb.checksumType != checkSumTypeCRC32c { + return nil, fmt.Errorf("cannot read superblock: invalid checksum type %d, only valid is %d", sb.checksumType, checkSumTypeCRC32c) + } + + // b[0x176:0x178] are reserved padding + + sb.totalKBWritten = binary.LittleEndian.Uint64(b[0x178:0x180]) + + sb.snapshotInodeNumber = binary.LittleEndian.Uint32(b[0x180:0x184]) + sb.snapshotID = binary.LittleEndian.Uint32(b[0x184:0x188]) + sb.snapshotReservedBlocks = binary.LittleEndian.Uint64(b[0x188:0x190]) + sb.snapshotStartInode = binary.LittleEndian.Uint32(b[0x190:0x194]) + + // errors + sb.errorCount = binary.LittleEndian.Uint32(b[0x194:0x198]) + sb.errorFirstTime = time.Unix(int64(binary.LittleEndian.Uint32(b[0x198:0x19c])), 0) + sb.errorFirstInode = binary.LittleEndian.Uint32(b[0x19c:0x1a0]) + sb.errorFirstBlock = binary.LittleEndian.Uint64(b[0x1a0:0x1a8]) + sb.errorFirstFunction = string(b[0x1a8:0x1c8]) + sb.errorFirstLine = binary.LittleEndian.Uint32(b[0x1c8:0x1cc]) + sb.errorLastTime = time.Unix(int64(binary.LittleEndian.Uint32(b[0x1cc:0x1d0])), 0) + sb.errorLastInode = binary.LittleEndian.Uint32(b[0x1d0:0x1d4]) + sb.errorLastLine = binary.LittleEndian.Uint32(b[0x1d4:0x1d8]) + sb.errorLastBlock = binary.LittleEndian.Uint64(b[0x1d8:0x1e0]) + sb.errorLastFunction = string(b[0x1e0:0x200]) + + sb.mountOptions = string(b[0x200:0x240]) + sb.userQuotaInode = binary.LittleEndian.Uint32(b[0x240:0x244]) + sb.groupQuotaInode = binary.LittleEndian.Uint32(b[0x244:0x248]) + // overheadBlocks *always* is 0 + sb.overheadBlocks = binary.LittleEndian.Uint32(b[0x248:0x24c]) + sb.backupSuperblockBlockGroups = [2]uint32{ + binary.LittleEndian.Uint32(b[0x24c:0x250]), + binary.LittleEndian.Uint32(b[0x250:0x254]), + } + for i := 0; i < 4; i++ { + sb.encryptionAlgorithms = append(sb.encryptionAlgorithms, encryptionAlgorithm(b[0x254+i])) + } + sb.encryptionSalt = b[0x258:0x268] + sb.lostFoundInode = binary.LittleEndian.Uint32(b[0x268:0x26c]) + sb.projectQuotaInode = binary.LittleEndian.Uint32(b[0x26c:0x270]) + + sb.checksumSeed = binary.LittleEndian.Uint32(b[0x270:0x274]) + + // b[0x274:0x3fc] are reserved for zero padding + + // checksum + checksum := binary.LittleEndian.Uint32(b[0x3fc:0x400]) + + // calculate the checksum and validate - we use crc32c + if sb.features.metadataChecksums { + crc32Table := crc32.MakeTable(crc32.Castagnoli) + actualChecksum := crc32.Checksum(b[0:0x3fe], crc32Table) + if actualChecksum != checksum { + return nil, fmt.Errorf("invalid superblock checksum, actual was %x, on disk was %x", actualChecksum, checksum) + } + } + + return &sb, nil +} + +// toBytes returns a superblock ready to be written to disk +func (sb *superblock) toBytes() ([]byte, error) { + b := make([]byte, SuperblockSize) + + binary.LittleEndian.PutUint16(b[0x38:0x3a], superblockSignature) + compatFlags, incompatFlags, roCompatFlags := sb.features.toInts() + binary.LittleEndian.PutUint32(b[0x5c:0x60], compatFlags) + binary.LittleEndian.PutUint32(b[0x60:0x64], incompatFlags) + binary.LittleEndian.PutUint32(b[0x64:0x68], roCompatFlags) + + binary.LittleEndian.PutUint32(b[0:4], sb.inodeCount) + + // block count, reserved block count and free blocks depends on whether the fs is 64-bit or not + blockCount := make([]byte, 8) + reservedBlocks := make([]byte, 8) + freeBlocks := make([]byte, 8) + + binary.LittleEndian.PutUint64(blockCount, sb.blockCount) + binary.LittleEndian.PutUint64(reservedBlocks, sb.reservedBlocks) + binary.LittleEndian.PutUint64(freeBlocks, sb.freeBlocks) + + copy(b[0x4:0x8], blockCount[0:4]) + copy(b[0x8:0xc], reservedBlocks[0:4]) + copy(b[0xc:0x10], freeBlocks[0:4]) + + if sb.features.fs64Bit { + copy(b[0x150:0x154], blockCount[4:8]) + copy(b[0x154:0x158], reservedBlocks[4:8]) + copy(b[0x158:0x15c], freeBlocks[4:8]) + } + + binary.LittleEndian.PutUint32(b[0x10:0x14], sb.freeInodes) + binary.LittleEndian.PutUint32(b[0x14:0x18], sb.firstDataBlock) + binary.LittleEndian.PutUint32(b[0x18:0x1c], uint32(math.Log2(float64(sb.blockSize))-10)) + binary.LittleEndian.PutUint32(b[0x1c:0x20], uint32(math.Log2(float64(sb.clusterSize)))) + + binary.LittleEndian.PutUint32(b[0x20:0x24], sb.blocksPerGroup) + if sb.features.bigalloc { + binary.LittleEndian.PutUint32(b[0x24:0x28], sb.clustersPerGroup) + } + binary.LittleEndian.PutUint32(b[0x28:0x2c], sb.inodesPerGroup) + binary.LittleEndian.PutUint32(b[0x2c:0x30], uint32(sb.mountTime.Unix())) + binary.LittleEndian.PutUint32(b[0x30:0x34], uint32(sb.writeTime.Unix())) + binary.LittleEndian.PutUint16(b[0x34:0x36], sb.mountCount) + binary.LittleEndian.PutUint16(b[0x36:0x38], sb.mountsToFsck) + + binary.LittleEndian.PutUint16(b[0x3a:0x3c], uint16(sb.filesystemState)) + binary.LittleEndian.PutUint16(b[0x3c:0x3e], uint16(sb.errorBehaviour)) + + binary.LittleEndian.PutUint16(b[0x3e:0x40], sb.minorRevision) + binary.LittleEndian.PutUint32(b[0x40:0x44], uint32(sb.lastCheck.Unix())) + binary.LittleEndian.PutUint32(b[0x44:0x48], sb.checkInterval) + + binary.LittleEndian.PutUint32(b[0x48:0x4c], uint32(sb.creatorOS)) + binary.LittleEndian.PutUint32(b[0x4c:0x50], sb.revisionLevel) + binary.LittleEndian.PutUint16(b[0x50:0x52], sb.reservedBlocksDefaultUID) + binary.LittleEndian.PutUint16(b[0x52:0x54], sb.reservedBlocksDefaultGID) + + binary.LittleEndian.PutUint32(b[0x54:0x58], sb.firstNonReservedInode) + binary.LittleEndian.PutUint16(b[0x58:0x5a], sb.inodeSize) + binary.LittleEndian.PutUint16(b[0x5a:0x5c], sb.blockGroup) + + if sb.uuid != nil { + copy(b[0x68:0x78], sb.uuid.Bytes()) + } + + ab, err := stringToASCIIBytes(sb.volumeLabel, 16) + if err != nil { + return nil, fmt.Errorf("error converting volume label to bytes: %v", err) + } + copy(b[0x78:0x88], ab[0:16]) + ab, err = stringToASCIIBytes(sb.lastMountedDirectory, 64) + if err != nil { + return nil, fmt.Errorf("error last mounted directory to bytes: %v", err) + } + copy(b[0x88:0xc8], ab[0:64]) + + binary.LittleEndian.PutUint32(b[0xc8:0xcc], sb.algorithmUsageBitmap) + + b[0xcc] = sb.preallocationBlocks + b[0xcd] = sb.preallocationDirectoryBlocks + binary.LittleEndian.PutUint16(b[0xce:0xd0], sb.reservedGDTBlocks) + + if sb.journalSuperblockUUID != nil { + copy(b[0xd0:0xe0], sb.journalSuperblockUUID.Bytes()) + } + + binary.LittleEndian.PutUint32(b[0xe0:0xe4], sb.journalInode) + binary.LittleEndian.PutUint32(b[0xe4:0xe8], sb.journalDeviceNumber) + binary.LittleEndian.PutUint32(b[0xe8:0xec], sb.orphanedInodesStart) + + // to be safe + if len(sb.hashTreeSeed) < 4 { + sb.hashTreeSeed = append(sb.hashTreeSeed, 0, 0, 0, 0) + } + binary.LittleEndian.PutUint32(b[0xec:0xf0], sb.hashTreeSeed[0]) + binary.LittleEndian.PutUint32(b[0xf0:0xf4], sb.hashTreeSeed[1]) + binary.LittleEndian.PutUint32(b[0xf4:0xf8], sb.hashTreeSeed[2]) + binary.LittleEndian.PutUint32(b[0xf8:0xfc], sb.hashTreeSeed[3]) + + b[0xfc] = byte(sb.hashVersion) + + binary.LittleEndian.PutUint16(b[0xfd:0x100], sb.groupDescriptorSize) + + binary.LittleEndian.PutUint32(b[0x100:0x104], sb.defaultMountOptions.toInt()) + binary.LittleEndian.PutUint32(b[0x104:0x108], sb.firstMetablockGroup) + binary.LittleEndian.PutUint32(b[0x108:0x10c], uint32(sb.mkfsTime.Unix())) + + if sb.journalBackup != nil { + b[0xfd] = 0 + startJournalBackup := 0x10c + for i := 0; i < 15; i++ { + start := startJournalBackup + 4*i + end := startJournalBackup + 4*i + 4 + binary.LittleEndian.PutUint32(b[start:end], sb.journalBackup.iBlocks[i]) + } + + iSizeBytes := make([]byte, 8) + binary.LittleEndian.PutUint64(iSizeBytes, sb.journalBackup.iSize) + copy(b[startJournalBackup+4*16:startJournalBackup+4*17], iSizeBytes[0:4]) + copy(b[startJournalBackup+4*15:startJournalBackup+4*16], iSizeBytes[4:8]) + } + + binary.LittleEndian.PutUint16(b[0x15c:0x15e], sb.inodeMinBytes) + binary.LittleEndian.PutUint16(b[0x15e:0x160], sb.inodeReserveBytes) + binary.LittleEndian.PutUint32(b[0x160:0x164], sb.miscFlags.toInt()) + + binary.LittleEndian.PutUint16(b[0x164:0x166], sb.raidStride) + binary.LittleEndian.PutUint32(b[0x170:0x174], sb.raidStripeWidth) + + binary.LittleEndian.PutUint16(b[0x166:0x168], sb.multiMountPreventionInterval) + binary.LittleEndian.PutUint64(b[0x168:0x170], sb.multiMountProtectionBlock) + + b[0x174] = uint8(math.Log2(float64(sb.logGroupsPerFlex))) + + b[0x175] = sb.checksumType // only valid one is 1 + + // b[0x176:0x178] are reserved padding + + binary.LittleEndian.PutUint64(b[0x178:0x180], sb.totalKBWritten) + + binary.LittleEndian.PutUint32(b[0x180:0x184], sb.snapshotInodeNumber) + binary.LittleEndian.PutUint32(b[0x184:0x188], sb.snapshotID) + binary.LittleEndian.PutUint64(b[0x188:0x190], sb.snapshotReservedBlocks) + binary.LittleEndian.PutUint32(b[0x190:0x194], sb.snapshotStartInode) + + // errors + binary.LittleEndian.PutUint32(b[0x194:0x198], sb.errorCount) + binary.LittleEndian.PutUint32(b[0x198:0x19c], uint32(sb.errorFirstTime.Unix())) + binary.LittleEndian.PutUint32(b[0x19c:0x1a0], sb.errorFirstInode) + binary.LittleEndian.PutUint64(b[0x1a0:0x1a8], sb.errorFirstBlock) + errorFirstFunctionBytes, err := stringToASCIIBytes(sb.errorFirstFunction, 32) + if err != nil { + return nil, fmt.Errorf("error converting errorFirstFunction to bytes: %v", err) + } + copy(b[0x1a8:0x1c8], errorFirstFunctionBytes) + binary.LittleEndian.PutUint32(b[0x1c8:0x1cc], sb.errorFirstLine) + binary.LittleEndian.PutUint32(b[0x1cc:0x1d0], uint32(sb.errorLastTime.Unix())) + binary.LittleEndian.PutUint32(b[0x1d0:0x1d4], sb.errorLastInode) + binary.LittleEndian.PutUint32(b[0x1d4:0x1d8], sb.errorLastLine) + binary.LittleEndian.PutUint64(b[0x1d8:0x1e0], sb.errorLastBlock) + errorLastFunctionBytes, err := stringToASCIIBytes(sb.errorLastFunction, 32) + if err != nil { + return nil, fmt.Errorf("error converting errorLastFunction to bytes: %v", err) + } + copy(b[0x1e0:0x200], errorLastFunctionBytes) + + mountOptionsBytes, err := stringToASCIIBytes(sb.mountOptions, 64) + if err != nil { + return nil, fmt.Errorf("error converting mountOptions to bytes: %v", err) + } + copy(b[0x200:0x240], mountOptionsBytes) + binary.LittleEndian.PutUint32(b[0x240:0x244], sb.userQuotaInode) + binary.LittleEndian.PutUint32(b[0x244:0x248], sb.groupQuotaInode) + // overheadBlocks *always* is 0 + binary.LittleEndian.PutUint32(b[0x248:0x24c], sb.overheadBlocks) + binary.LittleEndian.PutUint32(b[0x24c:0x250], sb.backupSuperblockBlockGroups[0]) + binary.LittleEndian.PutUint32(b[0x250:0x254], sb.backupSuperblockBlockGroups[1]) + for i := 0; i < 4; i++ { + b[0x254+i] = byte(sb.encryptionAlgorithms[i]) + } + copy(b[0x258:0x268], sb.encryptionSalt) + binary.LittleEndian.PutUint32(b[0x268:0x26c], sb.lostFoundInode) + binary.LittleEndian.PutUint32(b[0x26c:0x270], sb.projectQuotaInode) + + binary.LittleEndian.PutUint32(b[0x270:0x274], sb.checksumSeed) + + // b[0x274:0x3fc] are reserved for zero padding + + // calculate the checksum and validate - we use crc32c + if sb.features.metadataChecksums { + crc32Table := crc32.MakeTable(crc32.Castagnoli) + actualChecksum := crc32.Checksum(b[0:0x3fe], crc32Table) + binary.LittleEndian.PutUint32(b[0x3fc:0x400], actualChecksum) + } + + return b, nil +} + +// calculateBackupSuperblocks calculate which block groups should have backup superblocks. +func calculateBackupSuperblockGroups(bgs int64) []int64 { + // calculate which block groups should have backup superblocks + // these are if the block group number is a power of 3, 5, or 7 + var backupGroups []int64 + for i := float64(0); ; i++ { + bg := int64(math.Pow(3, i)) + if bg >= bgs { + break + } + backupGroups = append(backupGroups, bg) + } + for i := float64(0); ; i++ { + bg := int64(math.Pow(5, i)) + if bg >= bgs { + break + } + backupGroups = append(backupGroups, bg) + } + for i := float64(0); ; i++ { + bg := int64(math.Pow(7, i)) + if bg >= bgs { + break + } + backupGroups = append(backupGroups, bg) + } + // sort the backup groups + uniqBackupGroups := util.Uniqify[int64](backupGroups) + sort.Slice(uniqBackupGroups, func(i, j int) bool { + return uniqBackupGroups[i] < uniqBackupGroups[j] + }) + return uniqBackupGroups +} diff --git a/filesystem/ext4/superblock_test.go b/filesystem/ext4/superblock_test.go new file mode 100644 index 00000000..86406d59 --- /dev/null +++ b/filesystem/ext4/superblock_test.go @@ -0,0 +1,64 @@ +package ext4 + +import ( + "bytes" + "os" + "reflect" + "testing" +) + +func TestSuperblockFromBytes(t *testing.T) { + b, err := os.ReadFile("testdata/superblock.bin") + if err != nil { + t.Fatalf("Failed to read superblock.bin") + } + sb, err := superblockFromBytes(b) + if err != nil { + t.Fatalf("Failed to parse superblock.bin: %v", err) + } + expected := &superblock{} + if !expected.equal(sb) { + t.Errorf("superblockFromBytes() = %v; want %v", sb, expected) + } +} + +func TestSuperblockToBytes(t *testing.T) { + sb := superblock{} + expected, err := os.ReadFile("testdata/superblock.bin") + if err != nil { + t.Fatalf("Failed to read superblock.bin") + } + b, err := sb.toBytes() + if err != nil { + t.Fatalf("Failed to serialize superblock: %v", err) + } + if !bytes.Equal(b, expected) { + t.Errorf("superblock.toBytes() = %v; want %v", b, expected) + } +} + +func TestCalculateBackupSuperblocks(t *testing.T) { + tests := []struct { + bgs int64 + expected []int64 + }{ + // Test case 1: Single block group + {bgs: 2, expected: []int64{1}}, + + // Test case 2: Multiple block groups + {bgs: 119, expected: []int64{1, 3, 5, 7, 9, 25, 27, 49, 81}}, + + // Test case 3: Large number of block groups + {bgs: 746, expected: []int64{1, 3, 5, 7, 9, 25, 27, 49, 81, 125, 243, 343, 625, 729}}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + result := calculateBackupSuperblockGroups(tt.bgs) + if !reflect.DeepEqual(result, tt.expected) { + t.Errorf("calculateBackupSuperblockGroups(%d) = %v; want %v", + tt.bgs, result, tt.expected) + } + }) + } +} diff --git a/filesystem/ext4/testdata/.gitignore b/filesystem/ext4/testdata/.gitignore new file mode 100644 index 00000000..1110d029 --- /dev/null +++ b/filesystem/ext4/testdata/.gitignore @@ -0,0 +1 @@ +ext4.img diff --git a/filesystem/ext4/testdata/README.md b/filesystem/ext4/testdata/README.md new file mode 100644 index 00000000..6c87e2cc --- /dev/null +++ b/filesystem/ext4/testdata/README.md @@ -0,0 +1,38 @@ +# ext4 Test Fixtures + +This directory contains test fixtures for ext4 filesystems. Specifically, it contains the following files: + +* `ext4.img`: A 100MB filesystem img + +Because of the size of the image, it is excluded from git. It needs to be generated anew for each +installation on which you want to test. Of course, each generation can give slightly different +inode information, and certainly will give different timestamps, so you need to update the tests +appropriately; see below. + +To generate the `ext4.img`, run `./buildimg.sh`. + +This makes: + +* the `/foo` directory with sufficient entries to require using hash tree directories +* some small and large files in the root + +You now have the exact files in `$PWD` + +## Updating Tests + +Certain data should be copied over to your "valid" test information in test files, notably dates. + +## Fixed information + +In addition to the `ext4.img`, there is a file `superblock.bin` with just a superblock in it. As blocks normally are 1024 bytes, +this is small and is committed to git. + +You can recreate this superblock by doing: + +```sh +./buildimg.sh +dd if=ext4.img of=superblock.bin bs=1024 count=1 skip=1 +``` + +Note, however, that the superblock itself will be different due to regeneration changes. You will need to fix tests that depend on it, +and then commit both the changed tests and the superblock.img to git. diff --git a/filesystem/ext4/testdata/buildimg.sh b/filesystem/ext4/testdata/buildimg.sh new file mode 100644 index 00000000..428ae2d6 --- /dev/null +++ b/filesystem/ext4/testdata/buildimg.sh @@ -0,0 +1,17 @@ +#!/bin/sh +set -e +cat << "EOF" | docker run -i --rm -v $PWD:/data --privileged alpine:3.20 +apk --update add e2fsprogs +dd if=/dev/zero of=/data/ext4.img bs=1M count=10 +mkfs.ext4 /data/ext4.img +mount /data/ext4.img /mnt +mkdir /mnt/foo +mkdir /mnt/foo/bar +echo "This is a short file" > /mnt/shortfile.txt +dd if=/dev/zero of=/mnt/two-k-file.dat bs=1024 count=2 +dd if=/dev/zero of=/mnt/six-k-file.dat bs=1024 count=6 +dd if=/dev/zero of=/mnt/seven-k-file.dat bs=1024 count=7 +dd if=/dev/zero of=/mnt/ten-meg-file.dat bs=1M count=10 +i=0; until [ $i -gt 10000 ]; do mkdir /mnt/foo/dir${i}; i=$(( $i+1 )); done +umount /mnt +EOF diff --git a/filesystem/ext4/testdata/superblock.bin b/filesystem/ext4/testdata/superblock.bin new file mode 100644 index 00000000..752552be Binary files /dev/null and b/filesystem/ext4/testdata/superblock.bin differ diff --git a/filesystem/ext4/util.go b/filesystem/ext4/util.go new file mode 100644 index 00000000..3e3f1647 --- /dev/null +++ b/filesystem/ext4/util.go @@ -0,0 +1,134 @@ +package ext4 + +import ( + "fmt" + "strings" +) + +const ( + // KB represents one KB + KB int64 = 1024 + // MB represents one MB + MB int64 = 1024 * KB + // GB represents one GB + GB int64 = 1024 * MB + // TB represents one TB + TB int64 = 1024 * GB + // PB represents one TB + PB int64 = 1024 * TB + // XB represents one Exabyte + XB int64 = 1024 * PB + // these because they are larger than int64 or uint64 can handle + // ZB represents one Zettabyte + // ZB int64 = 1024 * XB + // YB represents one Yottabyte + // YB int64 = 1024 * ZB + // Ext4MaxSize is maximum size of an ext4 filesystem in bytes + // it varies based on the block size and if we are 64-bit or 32-bit mode, but the absolute complete max + // is 64KB per block (128 sectors) in 64-bit mode + // for a max filesystem size of 1YB (yottabyte) + // Ext4MaxSize int64 = YB + // if we ever actually care, we will use math/big to do it + // var xb, ZB, kb, YB big.Int + // kb.SetUint64(1024) + // xb.SetUint64(uint64(XB)) + // ZB.Mul(&xb, &kb) + // YB.Mul(&ZB, &kb) + + // Ext4MinSize is minimum size for an ext4 filesystem + // it assumes a single block group with: + // blocksize = 2 sectors = 1KB + // 1 block for boot code + // 1 block for superblock + // 1 block for block group descriptors + // 1 block for bock and inode bitmaps and inode table + // 1 block for data + // total = 5 blocks + Ext4MinSize int64 = 5 * int64(SectorSize512) + + // volume +) + +func splitPath(p string) []string { + // we need to split such that each one ends in "/", except possibly the last one + parts := strings.Split(p, "/") + // eliminate empty parts + ret := make([]string, 0) + for _, sub := range parts { + if sub != "" { + ret = append(ret, sub) + } + } + return ret +} + +// convert a string to a byte array, if all characters are valid ascii +// always pads to the full length provided in padding. If size is less than the length of the string, it will be truncated +func stringToASCIIBytes(s string, size int) ([]byte, error) { + length := len(s) + b := make([]byte, length) + // convert the name into 11 bytes + r := []rune(s) + // take the first 8 characters + for i := 0; i < length; i++ { + val := int(r[i]) + // we only can handle values less than max byte = 255 + if val > 255 { + return nil, fmt.Errorf("Non-ASCII character in name: %s", s) + } + b[i] = byte(val) + } + if len(b) < size { + // pad with nulls + for i := len(b); i < size; i++ { + b = append(b, 0) + } + } + if len(b) > size { + b = b[:size] + } + return b, nil +} + +var crc16tab = [256]uint16{ + 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, + 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, + 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6, + 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, + 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485, + 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d, + 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4, + 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc, + 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823, + 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, + 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12, + 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a, + 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41, + 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49, + 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70, + 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, + 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f, + 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067, + 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, + 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256, + 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d, + 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, + 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c, + 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634, + 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, + 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3, + 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a, + 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92, + 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9, + 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1, + 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, + 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0} + +func crc16(bs []byte) (crc uint16) { + l := len(bs) + for i := 0; i < l; i++ { + crc = ((crc << 8) & 0xff00) ^ crc16tab[((crc>>8)&0xff)^uint16(bs[i])] + } + + return +} diff --git a/filesystem/ext4/util_test.go b/filesystem/ext4/util_test.go new file mode 100644 index 00000000..86a2e15a --- /dev/null +++ b/filesystem/ext4/util_test.go @@ -0,0 +1,36 @@ +package ext4 + +import ( + "bytes" + "testing" +) + +func TestStringToASCIIBytes(t *testing.T) { + tests := []struct { + s string + size int + expected []byte + err error + }{ + // Test case 1: Empty string + {"", 16, []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, nil}, + + // Test case 2: Short string + {"EXT4", 5, []byte{'E', 'X', 'T', '4', 0}, nil}, + + // Test case 3: Long string + {"EXT4 filesystem", 8, []byte{'E', 'X', 'T', '4', ' ', 'f', 'i', 'l'}, nil}, + } + + for _, tt := range tests { + t.Run("", func(t *testing.T) { + result, err := stringToASCIIBytes(tt.s, tt.size) + if err != tt.err { + t.Fatalf("stringToASCIIBytes(%q, %d) error = %v; want %v", tt.s, tt.size, err, tt.err) + } + if !bytes.Equal(result, tt.expected) { + t.Errorf("stringToASCIIBytes(%q, %d) = %v; want %v", tt.s, tt.size, result, tt.expected) + } + }) + } +} diff --git a/filesystem/filesystem.go b/filesystem/filesystem.go index fdd35313..2c1acfa6 100644 --- a/filesystem/filesystem.go +++ b/filesystem/filesystem.go @@ -34,4 +34,6 @@ const ( TypeISO9660 // TypeSquashfs is a squashfs filesystem TypeSquashfs + // TypeExt4 is an ext4 compatible filesystem + TypeExt4 ) diff --git a/go.mod b/go.mod index ac2d2aa9..1127e8e9 100644 --- a/go.mod +++ b/go.mod @@ -1,16 +1,19 @@ module github.com/diskfs/go-diskfs -go 1.19 +go 1.21 require ( + github.com/bits-and-blooms/bitset v1.2.1 github.com/djherbis/times v1.6.0 github.com/elliotwutingfeng/asciiset v0.0.0-20230602022725-51bbb787efab github.com/go-test/deep v1.0.8 github.com/google/uuid v1.3.0 github.com/pierrec/lz4/v4 v4.1.17 github.com/pkg/xattr v0.4.9 + github.com/satori/go.uuid v1.2.0 github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af github.com/ulikunitz/xz v0.5.11 + golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 golang.org/x/sys v0.5.0 ) diff --git a/go.sum b/go.sum index 52b0b56d..c3dc1a03 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/bits-and-blooms/bitset v1.2.1 h1:M+/hrU9xlMp7t4TyTDQW97d3tRPVuKFC6zBEK16QnXY= +github.com/bits-and-blooms/bitset v1.2.1/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -17,6 +19,8 @@ github.com/pkg/xattr v0.4.9 h1:5883YPCtkSd8LFbs13nXplj9g9tlrwoJRjgpgMu1/fE= github.com/pkg/xattr v0.4.9/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= +github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yfB+If0vjp97vuT74F72r8hfRpP8jLU0= github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -24,11 +28,14 @@ github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5Cc github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8= github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220615213510-4f61da869c0c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/util/uniqify.go b/util/uniqify.go new file mode 100644 index 00000000..c091a6ec --- /dev/null +++ b/util/uniqify.go @@ -0,0 +1,13 @@ +package util + +func Uniqify[T comparable](s []T) []T { + m := make(map[T]bool) + for _, v := range s { + m[v] = true + } + var result = make([]T, 0, len(m)) + for k := range m { + result = append(result, k) + } + return result +}