From e33fe1e4211c95b4e9bd7fbde6bdc6a00d646aae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emil=20W=C3=A5reus?= Date: Thu, 14 Dec 2023 11:41:03 +0100 Subject: [PATCH] replace md5 with blake3 for fingerprinting --- go.mod | 2 ++ go.sum | 4 ++++ internal/fingerprint/fingerprint.go | 30 ++++++++++++++++-------- internal/fingerprint/fingerprint_test.go | 14 +++++------ 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/go.mod b/go.mod index c07cb99e..463f78b6 100644 --- a/go.mod +++ b/go.mod @@ -32,6 +32,7 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect + github.com/klauspost/cpuid/v2 v2.0.9 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.18 // indirect @@ -61,4 +62,5 @@ require ( gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + lukechampine.com/blake3 v1.2.1 // indirect ) diff --git a/go.sum b/go.sum index 83a25db3..a5820807 100644 --- a/go.sum +++ b/go.sum @@ -182,6 +182,8 @@ github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1 github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= @@ -641,6 +643,8 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +lukechampine.com/blake3 v1.2.1 h1:YuqqRuaqsGV71BV/nm9xlI0MKUv4QC54jQnBChWbGnI= +lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= diff --git a/internal/fingerprint/fingerprint.go b/internal/fingerprint/fingerprint.go index 4bbe837b..df4a6ec0 100644 --- a/internal/fingerprint/fingerprint.go +++ b/internal/fingerprint/fingerprint.go @@ -3,7 +3,6 @@ package fingerprint import ( "archive/zip" "bufio" - "crypto/md5" // #nosec "errors" "fmt" "io" @@ -14,6 +13,7 @@ import ( "github.com/debricked/cli/internal/file" "github.com/debricked/cli/internal/tui" + "lukechampine.com/blake3" ) var EXCLUDED_EXT = []string{ @@ -43,6 +43,15 @@ var EXCLUDED_FILES = []string{ "thumbs.db", "babel.config.js", "license.txt", "license.md", "copying.lib", "makefile", } +var HASH_SIZE = 16 + +func newHasher() *blake3.Hasher { + return blake3.New( + HASH_SIZE, + nil, + ) +} + const ( OutputFileNameFingerprints = "debricked.fingerprints.wfp" ) @@ -132,7 +141,7 @@ func (f *Fingerprinter) FingerprintFiles(rootPath string, exclusions []string) ( return err } - fingerprintsZip, err := computeMD5ForFileAndZip(fileInfo, path, exclusions) + fingerprintsZip, err := computeHashForFileAndZip(fileInfo, path, exclusions) if err != nil { return err } @@ -162,7 +171,7 @@ func (f *Fingerprinter) FingerprintFiles(rootPath string, exclusions []string) ( return fingerprints, err } -func computeMD5ForFileAndZip(fileInfo os.FileInfo, path string, exclusions []string) ([]FileFingerprint, error) { +func computeHashForFileAndZip(fileInfo os.FileInfo, path string, exclusions []string) ([]FileFingerprint, error) { if !shouldProcessFile(fileInfo, exclusions, path) { return nil, nil } @@ -182,8 +191,7 @@ func computeMD5ForFileAndZip(fileInfo os.FileInfo, path string, exclusions []str fingerprints = append(fingerprints, fingerprintsZip...) } - // Compute the MD5 for the file - fingerprint, err := computeMD5ForFile(path) + fingerprint, err := computeHashForFile(path) if err != nil { return nil, err } @@ -229,15 +237,15 @@ func shouldProcessFile(fileInfo os.FileInfo, exclusions []string, path string) b return !isSymlink } -func computeMD5ForFile(filename string) (FileFingerprint, error) { +func computeHashForFile(filename string) (FileFingerprint, error) { data, err := os.ReadFile(filename) if err != nil { return FileFingerprint{}, err } - hash := md5.New() // #nosec + hasher := newHasher() - if _, err := hash.Write(data); err != nil { + if _, err := hasher.Write(data); err != nil { return FileFingerprint{}, err } @@ -250,7 +258,7 @@ func computeMD5ForFile(filename string) (FileFingerprint, error) { return FileFingerprint{ path: filename, contentLength: contentLength, - fingerprint: hash.Sum(nil), + fingerprint: hasher.Sum(nil), }, nil } @@ -319,7 +327,9 @@ func inMemFingerprintingCompressedContent(filename string, exclusions []string) if err != nil { return nil, err } - hasher := md5.New() // #nosec + + hasher := newHasher() + _, err = io.Copy(hasher, rc) // #nosec if err != nil { rc.Close() diff --git a/internal/fingerprint/fingerprint_test.go b/internal/fingerprint/fingerprint_test.go index 01f44cbb..e68918c1 100644 --- a/internal/fingerprint/fingerprint_test.go +++ b/internal/fingerprint/fingerprint_test.go @@ -156,7 +156,7 @@ func TestFingerprintFiles(t *testing.T) { assert.NotNil(t, fingerprints) assert.NotEmpty(t, fingerprints) assert.Equal(t, 2, fingerprints.Len()) - assert.Equal(t, "file=72214db4e1e543018d1bafe86ea3b444,21,testdata/fingerprinter/testfile.py", fingerprints.Entries[0].ToString()) + assert.Equal(t, "file=634c5485de8e22b27094affadd8a6e3b,21,testdata/fingerprinter/testfile.py", fingerprints.Entries[0].ToString()) // Test no file fingerprints, err = fingerprinter.FingerprintFiles("", []string{}) @@ -198,14 +198,14 @@ func TestFileFingerprintToString(t *testing.T) { func TestComputeMD5(t *testing.T) { // Test file not found - _, err := computeMD5ForFile("testdata/fingerprinter/testfile-not-found.py") + _, err := computeHashForFile("testdata/fingerprinter/testfile-not-found.py") assert.Error(t, err) // Test file found - entry, err := computeMD5ForFile("testdata/fingerprinter/testfile.py") + entry, err := computeHashForFile("testdata/fingerprinter/testfile.py") assert.NoError(t, err) entryS := fmt.Sprintf("%x", entry.fingerprint) - assert.Equal(t, "72214db4e1e543018d1bafe86ea3b444", entryS) + assert.Equal(t, "634c5485de8e22b27094affadd8a6e3b", entryS) } func TestFingerprintsToFile(t *testing.T) { @@ -350,7 +350,7 @@ func TestInMemFingerprintingCompressedContent(t *testing.T) { } } -func TestComputeMD5ForFile(t *testing.T) { +func TestComputeHashForFile(t *testing.T) { tests := []struct { name string file string @@ -366,9 +366,9 @@ func TestComputeMD5ForFile(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - _, err := computeMD5ForFile(tt.file) + _, err := computeHashForFile(tt.file) if (err != nil) != tt.wantErr { - t.Errorf("computeMD5ForFile() error = %v, wantErr %v", err, tt.wantErr) + t.Errorf("computeHashForFile() error = %v, wantErr %v", err, tt.wantErr) } }) }