Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose option to scan without indexing #3360

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmd/syft/internal/commands/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,8 @@ func getSource(ctx context.Context, opts *options.Catalog, userInput string, sou
}).
WithBasePath(opts.Source.BasePath).
WithSources(sources...).
WithDefaultImagePullSource(opts.Source.Image.DefaultPullSource)
WithDefaultImagePullSource(opts.Source.Image.DefaultPullSource).
WithUnindexed(opts.Unindexed)

var err error
var platform *image.Platform
Expand Down
4 changes: 4 additions & 0 deletions cmd/syft/internal/options/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ type Catalog struct {
Platform string `yaml:"platform" json:"platform" mapstructure:"platform"`
Source sourceConfig `yaml:"source" json:"source" mapstructure:"source"`
Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"`
Unindexed bool `yaml:"unindexed" json:"unindexed" mapstructure:"unindexed"`

// configuration for inclusion of unknown information within elements
Unknowns unknownsConfig `yaml:"unknowns" mapstructure:"unknowns"`
Expand Down Expand Up @@ -228,6 +229,9 @@ func (cfg *Catalog) AddFlags(flags clio.FlagSet) {

flags.StringVarP(&cfg.Source.BasePath, "base-path", "",
"base directory for scanning, no links will be followed above this directory, and all paths will be reported relative to this directory")

flags.BoolVarP(&cfg.Unindexed, "unindexed", "",
"whether to index the file system or not, indexing can improve scan times but incurs a memory overhead, default false.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we expose a CLI option for this? I feel like this is more of a low-level option, which hints at leaving it only in the config (and overridable via env vars too).

}

func (cfg *Catalog) DescribeFields(descriptions fangs.FieldDescriptionSet) {
Expand Down
5 changes: 5 additions & 0 deletions syft/get_source_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ func (c *GetSourceConfig) WithAlias(alias source.Alias) *GetSourceConfig {
return c
}

func (c *GetSourceConfig) WithUnindexed(unindexed bool) *GetSourceConfig {
c.SourceProviderConfig = c.SourceProviderConfig.WithUnindexed(unindexed)
return c
}

func (c *GetSourceConfig) WithRegistryOptions(registryOptions *image.RegistryOptions) *GetSourceConfig {
c.SourceProviderConfig = c.SourceProviderConfig.WithRegistryOptions(registryOptions)
return c
Expand Down
126 changes: 92 additions & 34 deletions syft/internal/fileresolver/unindexed_directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,30 +18,32 @@ import (
"github.com/mitchellh/go-homedir"
"github.com/spf13/afero"

stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
syftFile "github.com/anchore/syft/syft/file"
)

var _ file.Resolver = (*UnindexedDirectory)(nil)
var _ file.WritableResolver = (*UnindexedDirectory)(nil)
var _ syftFile.Resolver = (*UnindexedDirectory)(nil)
var _ syftFile.WritableResolver = (*UnindexedDirectory)(nil)

type UnindexedDirectory struct {
ls afero.Lstater
lr afero.LinkReader
base string
dir string
fs afero.Fs
ls afero.Lstater
lr afero.LinkReader
base string
dir string
fs afero.Fs
pathFilters []PathIndexVisitor
}

func NewFromUnindexedDirectory(dir string) file.WritableResolver {
func NewFromUnindexedDirectory(dir string) syftFile.WritableResolver {
return NewFromUnindexedDirectoryFS(afero.NewOsFs(), dir, "")
}

func NewFromRootedUnindexedDirectory(dir string, base string) file.WritableResolver {
return NewFromUnindexedDirectoryFS(afero.NewOsFs(), dir, base)
func NewFromRootedUnindexedDirectory(dir string, base string, pathVisitors ...PathIndexVisitor) syftFile.WritableResolver {
return NewFromUnindexedDirectoryFS(afero.NewOsFs(), dir, base, pathVisitors...)
}

func NewFromUnindexedDirectoryFS(fs afero.Fs, dir string, base string) file.WritableResolver {
func NewFromUnindexedDirectoryFS(fs afero.Fs, dir string, base string, pathVisitors ...PathIndexVisitor) syftFile.WritableResolver {
ls, ok := fs.(afero.Lstater)
if !ok {
panic(fmt.Sprintf("unable to get afero.Lstater interface from: %+v", fs))
Expand Down Expand Up @@ -70,15 +72,16 @@ func NewFromUnindexedDirectoryFS(fs afero.Fs, dir string, base string) file.Writ
}
}
return UnindexedDirectory{
base: base,
dir: dir,
fs: fs,
ls: ls,
lr: lr,
base: base,
dir: dir,
fs: fs,
ls: ls,
lr: lr,
pathFilters: pathVisitors,
}
}

func (u UnindexedDirectory) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
func (u UnindexedDirectory) FileContentsByLocation(location syftFile.Location) (io.ReadCloser, error) {
p := u.absPath(u.scrubInputPath(location.RealPath))
f, err := u.fs.Open(p)
if err != nil {
Expand Down Expand Up @@ -149,11 +152,11 @@ func (u UnindexedDirectory) absPath(p string) string {

// - full symlink resolution should be performed on all requests
// - only returns locations to files (NOT directories)
func (u UnindexedDirectory) FilesByPath(paths ...string) (out []file.Location, _ error) {
func (u UnindexedDirectory) FilesByPath(paths ...string) (out []syftFile.Location, _ error) {
return u.filesByPath(true, false, paths...)
}

func (u UnindexedDirectory) filesByPath(resolveLinks bool, includeDirs bool, paths ...string) (out []file.Location, _ error) {
func (u UnindexedDirectory) filesByPath(resolveLinks bool, includeDirs bool, paths ...string) (out []syftFile.Location, _ error) {
// sort here for stable output
sort.Strings(paths)
nextPath:
Expand Down Expand Up @@ -183,11 +186,11 @@ nextPath:
// - full symlink resolution should be performed on all requests
// - if multiple paths to the same file are found, the best single match should be returned
// - only returns locations to files (NOT directories)
func (u UnindexedDirectory) FilesByGlob(patterns ...string) (out []file.Location, _ error) {
func (u UnindexedDirectory) FilesByGlob(patterns ...string) (out []syftFile.Location, _ error) {
return u.filesByGlob(true, false, patterns...)
}

func (u UnindexedDirectory) filesByGlob(resolveLinks bool, includeDirs bool, patterns ...string) (out []file.Location, _ error) {
func (u UnindexedDirectory) filesByGlob(resolveLinks bool, includeDirs bool, patterns ...string) (out []syftFile.Location, _ error) {
f := unindexedDirectoryResolverFS{
u: u,
}
Expand All @@ -206,14 +209,62 @@ func (u UnindexedDirectory) filesByGlob(resolveLinks bool, includeDirs bool, pat
return u.filesByPath(resolveLinks, includeDirs, paths...)
}

func (u UnindexedDirectory) FilesByMIMEType(_ ...string) ([]file.Location, error) {
panic("FilesByMIMEType unsupported")
// FilesByMIMEType fetches all of the files which match the provided MIME types.
// This requires walking the filetree from u.base and checking the MIME type of each file we encounter
// Handling of errors while walking is ignored unless a filterFn wants the directory to be skipped.
// TODO: afero.Walk will read all files in a single directory into memory, providing the same lexical ordering
// guarantees that golang's filepath.Walk implementation provides. However, when a single directory contains
// many files, this could cause Syft to run OOM. We could consider using a custom walk function in future
// which uses Readdir with a hardcoded value N >= 1 so that entire directories aren't read into memory each time.
func (u UnindexedDirectory) FilesByMIMEType(types ...string) ([]syftFile.Location, error) {
uniqueLocations := make([]syftFile.Location, 0)
err := afero.Walk(u.fs, u.absPath(u.base), func(p string, fi fs.FileInfo, walkErr error) error {
// Ignore any path for which a filter function returns true
for _, filterFn := range u.pathFilters {
if filterFn == nil {
continue
}

if filterErr := filterFn(u.base, p, fi, walkErr); filterErr != nil {
if errors.Is(filterErr, fs.SkipDir) {
// signal to walk() to skip this directory entirely (even if we're processing a file)
return filterErr
}
// skip this path but don't affect walk() trajectory
return nil
}
}

// If we get here, then the MIME type of the file should be checked
mimeType := stereoscopeFile.NewMetadataFromPath(p, fi).MIMEType
for _, mType := range types {
if mimeType == mType {
// Tidy the path, same as AllLocations
p = strings.TrimPrefix(p, u.dir)
if p == "" {
return nil
}
p = strings.TrimPrefix(p, "/")
uniqueLocations = append(uniqueLocations, syftFile.NewLocation(p))
}
}

// Continue to walk()
return nil
})

if err != nil {
log.Debug(err)
return nil, err
}

return uniqueLocations, nil
}

// RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
// This is helpful when attempting to find a file that is in the same layer or lower as another file.
func (u UnindexedDirectory) RelativeFileByPath(l file.Location, p string) *file.Location {
p = path.Clean(path.Join(l.RealPath, p))
func (u UnindexedDirectory) RelativeFileByPath(l syftFile.Location, p string) *syftFile.Location {
p = path.Clean(p)
locs, err := u.filesByPath(true, false, p)
if err != nil || len(locs) == 0 {
return nil
Expand All @@ -228,8 +279,8 @@ func (u UnindexedDirectory) RelativeFileByPath(l file.Location, p string) *file.

// - NO symlink resolution should be performed on results
// - returns locations for any file or directory
func (u UnindexedDirectory) AllLocations(ctx context.Context) <-chan file.Location {
out := make(chan file.Location)
func (u UnindexedDirectory) AllLocations(ctx context.Context) <-chan syftFile.Location {
out := make(chan syftFile.Location)
errWalkCanceled := fmt.Errorf("walk canceled")
go func() {
defer close(out)
Expand All @@ -240,7 +291,7 @@ func (u UnindexedDirectory) AllLocations(ctx context.Context) <-chan file.Locati
}
p = strings.TrimPrefix(p, "/")
select {
case out <- file.NewLocation(p):
case out <- syftFile.NewLocation(p):
return nil
case <-ctx.Done():
return errWalkCanceled
Expand All @@ -253,11 +304,18 @@ func (u UnindexedDirectory) AllLocations(ctx context.Context) <-chan file.Locati
return out
}

func (u UnindexedDirectory) FileMetadataByLocation(_ file.Location) (file.Metadata, error) {
panic("FileMetadataByLocation unsupported")
func (u UnindexedDirectory) FileMetadataByLocation(loc syftFile.Location) (syftFile.Metadata, error) {
p := u.absPath(u.scrubInputPath(loc.RealPath))
finfo, err := u.fs.Stat(p)
if err != nil {
return syftFile.Metadata{}, err
}

metadata := stereoscopeFile.NewMetadataFromPath(p, finfo)
return metadata, nil
}

func (u UnindexedDirectory) Write(location file.Location, reader io.Reader) error {
func (u UnindexedDirectory) Write(location syftFile.Location, reader io.Reader) error {
filePath := location.RealPath
if path.IsAbs(filePath) {
filePath = filePath[1:]
Expand All @@ -266,7 +324,7 @@ func (u UnindexedDirectory) Write(location file.Location, reader io.Reader) erro
return afero.WriteReader(u.fs, absPath, reader)
}

func (u UnindexedDirectory) newLocation(filePath string, resolveLinks bool) *file.Location {
func (u UnindexedDirectory) newLocation(filePath string, resolveLinks bool) *syftFile.Location {
filePath = path.Clean(filePath)

virtualPath := filePath
Expand All @@ -287,7 +345,7 @@ func (u UnindexedDirectory) newLocation(filePath string, resolveLinks bool) *fil
}
}

l := file.NewVirtualLocation(realPath, virtualPath)
l := syftFile.NewVirtualLocation(realPath, virtualPath)
return &l
}

Expand Down
49 changes: 49 additions & 0 deletions syft/internal/fileresolver/unindexed_directory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,55 @@ func Test_WritableUnindexedDirectoryResolver(t *testing.T) {
require.Equal(t, c, string(bytes))
}

func Test_UnindexedDirectoryResolver_FilesByMIMEType(t *testing.T) {
tests := []struct {
fixturePath string
mimeType string
expectedPaths *strset.Set
}{
{
fixturePath: "./test-fixtures/image-simple",
mimeType: "text/plain",
expectedPaths: strset.New("file-1.txt", "file-2.txt", "target/really/nested/file-3.txt", "Dockerfile"),
},
}
for _, test := range tests {
t.Run(test.fixturePath, func(t *testing.T) {
resolver := NewFromUnindexedDirectory(test.fixturePath)
locations, err := resolver.FilesByMIMEType(test.mimeType)
assert.NoError(t, err)
assert.Equal(t, test.expectedPaths.Size(), len(locations))
for _, l := range locations {
assert.True(t, test.expectedPaths.Has(l.RealPath), "does not have path %q", l.RealPath)
}
})
}
}

func Test_UnindexedDirectoryResolver_RelativeFileByPath(t *testing.T) {
cases := []struct {
name string
root string
searchFile string
expected *strset.Set
}{
{
name: "should find nested file from root",
root: "./test-fixtures/image-simple",
searchFile: "target/really/nested/file-3.txt",
expected: strset.New("target/really/nested/file-3.txt"),
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
resolver := NewFromUnindexedDirectory(c.root)
rootLoc := file.NewLocation(c.root)
loc := resolver.RelativeFileByPath(rootLoc, c.searchFile)
assert.True(t, c.expected.Has(loc.Coordinates.RealPath), "does not have path %q", loc.RealPath)
})
}
}

func testWithTimeout(t *testing.T, timeout time.Duration, test func(*testing.T)) {
done := make(chan bool)
go func() {
Expand Down
24 changes: 14 additions & 10 deletions syft/source/directorysource/directory_source.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,17 @@ import (
var _ source.Source = (*directorySource)(nil)

type Config struct {
Path string
Base string
Exclude source.ExcludeConfig
Alias source.Alias
Path string
Base string
Exclude source.ExcludeConfig
Alias source.Alias
Unindexed bool
}

type directorySource struct {
id artifact.ID
config Config
resolver *fileresolver.Directory
resolver file.Resolver
mutex *sync.Mutex
}

Expand Down Expand Up @@ -145,12 +146,15 @@ func (s *directorySource) FileResolver(_ source.Scope) (file.Resolver, error) {
// this should be the only file resolver that might have overlap with where files are cached
exclusionFunctions = append(exclusionFunctions, excludeCachePathVisitors()...)

res, err := fileresolver.NewFromDirectory(s.config.Path, s.config.Base, exclusionFunctions...)
if err != nil {
return nil, fmt.Errorf("unable to create directory resolver: %w", err)
if s.config.Unindexed {
s.resolver = fileresolver.NewFromRootedUnindexedDirectory(s.config.Path, s.config.Base)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we'll need to modify this resolver to accept exclusions -- I think it would be surprising to users to have different exclusion behavior based off of changing only an index related configuration. If this is too difficult or infeasible then we should at least warn the user with log.Warn().

} else {
res, err := fileresolver.NewFromDirectory(s.config.Path, s.config.Base, exclusionFunctions...)
if err != nil {
return nil, fmt.Errorf("unable to create directory resolver: %w", err)
}
s.resolver = res
}

s.resolver = res
}

return s.resolver, nil
Expand Down
Loading
Loading