Skip to content

Commit

Permalink
improve fingerprinting logic with platform independence and include i…
Browse files Browse the repository at this point in the history
…t in scans
  • Loading branch information
emilwareus committed Oct 10, 2023
1 parent 1a3c8b8 commit 234716d
Show file tree
Hide file tree
Showing 11 changed files with 295 additions and 186 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ jobs:
os: [ 'ubuntu-latest', 'windows-latest', 'macos-latest' ]
runs-on: ${{ matrix.os }}
steps:
- name: Set git to use LF
run: |
git config --global core.autocrlf input
git config --global core.eol lf
- uses: actions/checkout@v3

- name: Set up Go
Expand Down
8 changes: 5 additions & 3 deletions internal/cmd/files/fingerprint/fingerprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ const (
)

func NewFingerprintCmd(fingerprinter file.IFingerprint) *cobra.Command {

short := fmt.Sprintf("Fingerprint files for identification in a given path and writes it to %s. [beta feature]", file.OutputFileNameFingerprints)
long := fmt.Sprintf("Fingerprint files for identification in a given path and writes it to %s. [beta feature]\nThis hashes all files and matches them against the Debricked knowledge base.", file.OutputFileNameFingerprints)
cmd := &cobra.Command{
Use: "fingerprint [path]",
Short: "Fingerprint files for identification in a given path and writes it to " + file.OutputFileNameFingerprints,
Long: `Fingerprint files for identification in a given path.
This hashes all files and matches them against the Debricked knowledge base.`,
Short: short,
Long: long,
PreRun: func(cmd *cobra.Command, _ []string) {
_ = viper.BindPFlags(cmd.Flags())
},
Expand Down
6 changes: 3 additions & 3 deletions internal/cmd/scan/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ const (
IntegrationFlag = "integration"
ExclusionFlag = "exclusion"
NoResolveFlag = "no-resolve"
NoFingerprintFlag = "no-fingerprint"
FingerprintFlag = "fingerprint"
PassOnTimeOut = "pass-on-timeout"
)

Expand Down Expand Up @@ -84,7 +84,7 @@ $ debricked scan . `+exampleFlags)
cmd.Flags().BoolVarP(&passOnDowntime, PassOnTimeOut, "p", false, "pass scan if there is a service access timeout")
cmd.Flags().BoolVar(&noResolve, NoResolveFlag, false, `disables resolution of manifest files that lack lock files. Resolving manifest files enables more accurate dependency scanning since the whole dependency tree will be analysed.
For example, if there is a "go.mod" in the target path, its dependencies are going to get resolved onto a lock file, and latter scanned.`)
cmd.Flags().BoolVar(&noFingerprint, NoFingerprintFlag, true, "disables fingerprinting for undeclared component identification. Can be run as a standalone command with more granular options.")
cmd.Flags().BoolVar(&noFingerprint, FingerprintFlag, false, "enables fingerprinting for undeclared component identification. Can be run as a standalone command [files fingerprint] with more granular options. [beta feature]")
viper.MustBindEnv(RepositoryFlag)
viper.MustBindEnv(CommitFlag)
viper.MustBindEnv(BranchFlag)
Expand All @@ -105,7 +105,7 @@ func RunE(s *scan.IScanner) func(_ *cobra.Command, args []string) error {
options := scan.DebrickedOptions{
Path: path,
Resolve: !viper.GetBool(NoResolveFlag),
Fingerprint: !viper.GetBool(NoFingerprintFlag),
Fingerprint: viper.GetBool(FingerprintFlag),
Exclusions: viper.GetStringSlice(ExclusionFlag),
RepositoryName: viper.GetString(RepositoryFlag),
CommitName: viper.GetString(CommitFlag),
Expand Down
4 changes: 1 addition & 3 deletions internal/file/default_exclusion.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ func DefaultExclusionsFingerprint() []string {
output = append(output, filepath.Join("**", pattern, "**"))
}

for _, pattern := range EXCLUDED_DIRS_FINGERPRINT_RAW {
output = append(output, pattern)
}
output = append(output, EXCLUDED_DIRS_FINGERPRINT_RAW...)

return output
}
115 changes: 84 additions & 31 deletions internal/file/fingerprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ import (
"bufio"
"crypto/md5" // #nosec
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"

"github.com/debricked/cli/internal/tui"
)

var EXCLUDED_EXT = []string{
Expand Down Expand Up @@ -43,19 +45,35 @@ const (

func isExcludedFile(filename string) bool {

return isExcludedByExtension(filename) ||
isExcludedByFilename(filename) ||
isExcludedByEnding(filename)
}

func isExcludedByExtension(filename string) bool {
filenameLower := strings.ToLower(filename)
for _, format := range EXCLUDED_EXT {
if filepath.Ext(filenameLower) == format {
return true
}
}

return false
}

func isExcludedByFilename(filename string) bool {
filenameLower := strings.ToLower(filename)
for _, file := range ECLUDED_FILES {
if filenameLower == file {
return true
}
}

return false
}

func isExcludedByEnding(filename string) bool {
filenameLower := strings.ToLower(filename)
for _, ending := range EXCLUDED_FILE_ENDINGS {
if strings.HasSuffix(filenameLower, ending) {
return true
Expand All @@ -70,10 +88,13 @@ type IFingerprint interface {
}

type Fingerprinter struct {
spinnerManager tui.ISpinnerManager
}

func NewFingerprinter() *Fingerprinter {
return &Fingerprinter{}
return &Fingerprinter{
spinnerManager: tui.NewSpinnerManager("Fingerprinting", "0"),
}
}

type FileFingerprint struct {
Expand All @@ -83,61 +104,93 @@ type FileFingerprint struct {
}

func (f FileFingerprint) ToString() string {
return fmt.Sprintf("file=%x,%d,%s", f.fingerprint, f.contentLength, f.path)
}
// Replace backslashes with forward slashes to make the path platform independent
path := strings.ReplaceAll(f.path, "\\", "/")

return fmt.Sprintf("file=%x,%d,%s", f.fingerprint, f.contentLength, path)
}
func (f *Fingerprinter) FingerprintFiles(rootPath string, exclusions []string) (Fingerprints, error) {

log.Println("Warning: Fingerprinting is beta and may not work as expected.")
if len(rootPath) == 0 {
rootPath = filepath.Base("")
}

fingerprints := Fingerprints{}

// Traverse files to find dependency file groups
err := filepath.Walk(
rootPath,
func(path string, fileInfo os.FileInfo, err error) error {
if err != nil {
return err
}
if !fileInfo.IsDir() && !excluded(exclusions, path) {

if isExcludedFile(path) {
return nil
}
f.spinnerManager.Start()
spinnerMessage := "files processed"
spinner := f.spinnerManager.AddSpinner(spinnerMessage)

fingerprint, err := computeMD5(path)
nbFiles := 0

// Skip directories, fileInfo.IsDir() is not reliable enough
if err != nil && !strings.Contains(err.Error(), "is a directory") {
return err
} else if err == nil {
fingerprints.Append(fingerprint)
}
err := filepath.Walk(rootPath, func(path string, fileInfo os.FileInfo, err error) error {
nbFiles++

}
if err != nil {
return err
}

if !shouldProcessFile(fileInfo, exclusions, path) {
return nil
},
)
}

fingerprint, err := computeMD5(path)
if err != nil {
return err
}

fingerprints.Append(fingerprint)

if nbFiles%100 == 0 {
f.spinnerManager.SetSpinnerMessage(spinner, spinnerMessage, fmt.Sprintf("%d", nbFiles))
}

return nil
})

f.spinnerManager.SetSpinnerMessage(spinner, spinnerMessage, fmt.Sprintf("%d", nbFiles))

if err != nil {
spinner.Error()
} else {
spinner.Complete()
}

f.spinnerManager.Stop()

return fingerprints, err
}

func shouldProcessFile(fileInfo os.FileInfo, exclusions []string, path string) bool {
if fileInfo.IsDir() {
return false
}

if excluded(exclusions, path) {
return false
}

if isExcludedFile(path) {
return false
}

return true
}

func computeMD5(filename string) (FileFingerprint, error) {
file, err := os.Open(filename)
data, err := os.ReadFile(filename)
if err != nil {
return FileFingerprint{}, err
}
defer file.Close()

hash := md5.New() // #nosec
if _, err := io.Copy(hash, file); err != nil {

if _, err := hash.Write(data); err != nil {
return FileFingerprint{}, err
}

contentLength, err := file.Seek(0, 2)
contentLength := int64(len(data))

if err != nil {
return FileFingerprint{}, err
}
Expand Down
11 changes: 11 additions & 0 deletions internal/file/fingerprint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ func TestFingerprintFiles(t *testing.T) {

}

func TestFingerprintFilesBackslash(t *testing.T) {
fingerprint := FileFingerprint{
path: "testdata\\fingerprinter\\testfile.py",
contentLength: 21,
fingerprint: []byte{114, 33, 77, 180, 225, 229, 67, 1, 141, 27, 175, 232, 110, 163, 180, 68, 68, 68, 68, 68, 68},
}

assert.Equal(t, "file=72214db4e1e543018d1bafe86ea3b4444444444444,21,testdata/fingerprinter/testfile.py", fingerprint.ToString())

}

func TestFileFingerprintToString(t *testing.T) {
fileFingerprint := FileFingerprint{path: "path", contentLength: 10, fingerprint: []byte("fingerprint")}
assert.Equal(t, "file=66696e6765727072696e74,10,path", fileFingerprint.ToString())
Expand Down
8 changes: 4 additions & 4 deletions internal/resolution/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func (scheduler *Scheduler) Schedule(jobs []job.IJob) (IResolution, error) {
scheduler.queue = make(chan queueItem, len(jobs))
scheduler.waitGroup.Add(len(jobs))

scheduler.spinnerManager = tui.NewSpinnerManager()
scheduler.spinnerManager = tui.NewSpinnerManager("Resolving", "waiting for worker")

for w := 1; w <= scheduler.workers; w++ {
go scheduler.worker()
Expand Down Expand Up @@ -75,16 +75,16 @@ func (scheduler *Scheduler) worker() {
func (scheduler *Scheduler) updateStatus(item queueItem) {
for {
msg := <-item.job.ReceiveStatus()
tui.SetSpinnerMessage(item.spinner, item.job.GetFile(), msg)
scheduler.spinnerManager.SetSpinnerMessage(item.spinner, item.job.GetFile(), msg)
}
}

func (scheduler *Scheduler) finish(item queueItem) {
if item.job.Errors().HasError() {
tui.SetSpinnerMessage(item.spinner, item.job.GetFile(), "failed")
scheduler.spinnerManager.SetSpinnerMessage(item.spinner, item.job.GetFile(), "failed")
item.spinner.Error()
} else {
tui.SetSpinnerMessage(item.spinner, item.job.GetFile(), "done")
scheduler.spinnerManager.SetSpinnerMessage(item.spinner, item.job.GetFile(), "done")
item.spinner.Complete()
}
}
29 changes: 23 additions & 6 deletions internal/scan/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,24 +121,41 @@ func (dScanner *DebrickedScanner) Scan(o IOptions) error {
return nil
}

func (dScanner *DebrickedScanner) scan(options DebrickedOptions, gitMetaObject git.MetaObject) (*upload.UploadResult, error) {
func (dScanner *DebrickedScanner) scanResolve(options DebrickedOptions) error {
if options.Resolve {
_, resErr := dScanner.resolver.Resolve([]string{options.Path}, options.Exclusions)
if resErr != nil {
return nil, resErr
return resErr
}
}

return nil
}

func (dScanner *DebrickedScanner) scanFingerprint(options DebrickedOptions) error {
if options.Fingerprint {
fingerprints, err := dScanner.fingerprint.FingerprintFiles(options.Path, file.DefaultExclusionsFingerprint())
if err != nil {
return nil, err
return err
}
err = fingerprints.ToFile(file.OutputFileNameFingerprints)
if err != nil {
return nil, err
}

return err
}

return nil
}

func (dScanner *DebrickedScanner) scan(options DebrickedOptions, gitMetaObject git.MetaObject) (*upload.UploadResult, error) {

err := dScanner.scanResolve(options)
if err != nil {
return nil, err
}

err = dScanner.scanFingerprint(options)
if err != nil {
return nil, err
}

fileGroups, err := dScanner.finder.GetGroups(options.Path, options.Exclusions, false, file.StrictAll)
Expand Down
Loading

0 comments on commit 234716d

Please sign in to comment.