diff --git a/internal/fingerprint/fingerprint.go b/internal/fingerprint/fingerprint.go index 67f889bd..d5e62a63 100644 --- a/internal/fingerprint/fingerprint.go +++ b/internal/fingerprint/fingerprint.go @@ -1,9 +1,11 @@ package fingerprint import ( + "archive/zip" "bufio" "crypto/md5" // #nosec "fmt" + "io" "log" "os" "path/filepath" @@ -109,6 +111,7 @@ func (f FileFingerprint) ToString() string { return fmt.Sprintf("file=%x,%d,%s", f.fingerprint, f.contentLength, path) } + func (f *Fingerprinter) FingerprintFiles(rootPath string, exclusions []string) (Fingerprints, error) { log.Println("Warning: Fingerprinting is beta and may not work as expected.") if len(rootPath) == 0 { @@ -132,12 +135,21 @@ func (f *Fingerprinter) FingerprintFiles(rootPath string, exclusions []string) ( return nil } + // Scan the contents of compressed files + // such as .jar and .nupkg + if shouldUnzip(path) { + fingerprintsZip, err := inMemFingerprintingCompressedContent(path, exclusions) + if err != nil { + return err + } + fingerprints.Entries = append(fingerprints.Entries, fingerprintsZip...) + nbFiles += len(fingerprintsZip) + } nbFiles++ fingerprint, err := computeMD5(path) if err != nil { return err } - fingerprints.Append(fingerprint) if nbFiles%100 == 0 { @@ -184,6 +196,12 @@ func shouldProcessFile(fileInfo os.FileInfo, exclusions []string, path string) b isSymlink, err := isSymlink(path) if err != nil { + // If we get a "not a directory" error, we can assume it's not a symlink + // otherwise, we don't know, so we return false + return strings.HasSuffix(err.Error(), "not a directory") + } + + if isSymlink { return false } @@ -246,3 +264,53 @@ func (f *Fingerprints) ToFile(ouputFile string) error { func (f *Fingerprints) Append(fingerprint FileFingerprint) { f.Entries = append(f.Entries, fingerprint) } + +var filesToUnzip = []string{".jar", ".nupkg"} + +func shouldUnzip(filename string) bool { + for _, file := range filesToUnzip { + if filepath.Ext(filename) == file { + return true + } + } + + return false +} + +func inMemFingerprintingCompressedContent(filename string, exclusions []string) ([]FileFingerprint, error) { + + r, err := zip.OpenReader(filename) + if err != nil { + return nil, err + } + defer r.Close() + + fingerprints := []FileFingerprint{} + + for _, f := range r.File { + longFileNmae := fmt.Sprintf("%s/%s", filename, f.Name) + + if !shouldProcessFile(f.FileInfo(), exclusions, longFileNmae) { + continue + } + rc, err := f.Open() + if err != nil { + return nil, err + } + hasher := md5.New() + _, err = io.Copy(hasher, rc) + if err != nil { + return nil, err + } + + fingerprints = append(fingerprints, FileFingerprint{ + path: longFileNmae, + contentLength: int64(f.UncompressedSize64), + fingerprint: hasher.Sum(nil), + }) + + rc.Close() + } + + return fingerprints, nil +} diff --git a/internal/fingerprint/fingerprint_test.go b/internal/fingerprint/fingerprint_test.go index 8319de9c..a39c7e99 100644 --- a/internal/fingerprint/fingerprint_test.go +++ b/internal/fingerprint/fingerprint_test.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -162,3 +163,80 @@ func TestFingerprintsToFile(t *testing.T) { assert.NoError(t, err) } + +func TestShouldUnzip(t *testing.T) { + tests := []struct { + name string + filename string + want bool + }{ + { + name: "Should unzip .jar file", + filename: "test.jar", + want: true, + }, + { + name: "Should unzip .nupkg file", + filename: "test.nupkg", + want: true, + }, + { + name: "Should not unzip .txt file", + filename: "test.txt", + want: false, + }, + { + name: "Should not unzip .go file", + filename: "test.go", + want: false, + }, + { + name: "Should pick up .jar file in nested folder", + filename: "deep/folder/test.jar", + want: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := shouldUnzip(tt.filename); got != tt.want { + t.Errorf("shouldUnzip() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestInMemFingerprintingCompressedContent(t *testing.T) { + tests := []struct { + name string + path string + expected int + suffix string + }{ + { + name: "Jar", + path: "testdata/zipfile/jar", + expected: 5, + suffix: "log4j:log4j-api-2.18.0.jar", + }, + { + name: "Nupkg", + path: "testdata/zipfile/nupkg", + expected: 22, + suffix: "newtonsoft.json.13.0.3.nupkg", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fingerprinter := NewFingerprinter() + fingerprints, err := fingerprinter.FingerprintFiles(tt.path, []string{}) + assert.NoError(t, err) + assert.NotNil(t, fingerprints) + assert.NotEmpty(t, fingerprints) + assert.Equal(t, tt.expected, fingerprints.Len()) + lastRow := fingerprints.Entries[len(fingerprints.Entries)-1] + assert.True(t, strings.HasSuffix(lastRow.ToString(), tt.suffix)) + }) + } +} diff --git a/internal/fingerprint/testdata/zipfile/jar/log4j:log4j-api-2.18.0.jar b/internal/fingerprint/testdata/zipfile/jar/log4j:log4j-api-2.18.0.jar new file mode 100644 index 00000000..b4e933f7 Binary files /dev/null and b/internal/fingerprint/testdata/zipfile/jar/log4j:log4j-api-2.18.0.jar differ diff --git a/internal/fingerprint/testdata/zipfile/nupkg/newtonsoft.json.13.0.3.nupkg b/internal/fingerprint/testdata/zipfile/nupkg/newtonsoft.json.13.0.3.nupkg new file mode 100644 index 00000000..5829e3da Binary files /dev/null and b/internal/fingerprint/testdata/zipfile/nupkg/newtonsoft.json.13.0.3.nupkg differ