From 6af0edd4ef30efa65f1f4edbea1b68f43cc11df7 Mon Sep 17 00:00:00 2001 From: RTann Date: Mon, 26 Feb 2024 17:13:12 -0800 Subject: [PATCH] all: account for language package overwrites Signed-off-by: RTann --- gobin/coalescer.go | 21 +++- java/coalescer.go | 42 -------- java/ecosystem.go | 7 +- {nodejs => language}/coalescer.go | 28 +++++- language/coalescer_test.go | 155 ++++++++++++++++++++++++++++++ nodejs/coalescer_test.go | 71 -------------- nodejs/ecosystem.go | 3 +- python/coalescer.go | 47 --------- python/ecosystem.go | 3 +- ruby/coalescer.go | 46 --------- ruby/ecosystem.go | 3 +- 11 files changed, 207 insertions(+), 219 deletions(-) delete mode 100644 java/coalescer.go rename {nodejs => language}/coalescer.go (51%) create mode 100644 language/coalescer_test.go delete mode 100644 nodejs/coalescer_test.go delete mode 100644 python/coalescer.go delete mode 100644 ruby/coalescer.go diff --git a/gobin/coalescer.go b/gobin/coalescer.go index 5686ec38e..b3952b348 100644 --- a/gobin/coalescer.go +++ b/gobin/coalescer.go @@ -16,7 +16,13 @@ func (c *coalescer) Coalesce(ctx context.Context, ls []*indexer.LayerArtifacts) Packages: map[string]*claircore.Package{}, Repositories: map[string]*claircore.Repository{}, } - for _, l := range ls { + // Similar to ir.Packages, except instead of mapping + // id -> package, it maps packageDB -> package. + // For langauge packages, it is possible the + // packageDB is overwritten. + packages := make(map[string]*claircore.Package) + for i := len(ls) - 1; i >= 0; i-- { + l := ls[i] var rid string for _, r := range l.Repos { // Magic strings copied out of the osv package. @@ -31,6 +37,19 @@ func (c *coalescer) Coalesce(ctx context.Context, ls []*indexer.LayerArtifacts) if !strings.HasPrefix(pkg.PackageDB, "go:") { continue } + if childPkg, exists := packages[pkg.PackageDB]; exists { + // If the package was renamed or has a different version in a high layer, + // then we consider this a different package and ignore the + // original in the lower layer. + if pkg.Name != childPkg.Name || pkg.Version != childPkg.Version { + continue + } + // The name and version is the same, so delete the entry related to the higher + // layer, as this package was likely introduced in the lower layer. + delete(ir.Packages, childPkg.ID) + delete(ir.Environments, childPkg.ID) + } + packages[pkg.PackageDB] = pkg ir.Packages[pkg.ID] = pkg ir.Environments[pkg.ID] = []*claircore.Environment{ { diff --git a/java/coalescer.go b/java/coalescer.go deleted file mode 100644 index 688f86280..000000000 --- a/java/coalescer.go +++ /dev/null @@ -1,42 +0,0 @@ -package java - -import ( - "context" - - "github.com/quay/claircore" - "github.com/quay/claircore/indexer" -) - -type coalescer struct{} - -func (*coalescer) Coalesce(ctx context.Context, ls []*indexer.LayerArtifacts) (*claircore.IndexReport, error) { - ir := &claircore.IndexReport{ - Environments: map[string][]*claircore.Environment{}, - Packages: map[string]*claircore.Package{}, - Repositories: map[string]*claircore.Repository{}, - } - - for _, l := range ls { - // If we didn't find at least one maven repo in this layer - // no point in searching for packages. - if len(l.Repos) == 0 { - continue - } - rs := make([]string, len(l.Repos)) - for i, r := range l.Repos { - rs[i] = r.ID - ir.Repositories[r.ID] = r - } - for _, pkg := range l.Pkgs { - ir.Packages[pkg.ID] = pkg - ir.Environments[pkg.ID] = []*claircore.Environment{ - { - PackageDB: pkg.PackageDB, - IntroducedIn: l.Hash, - RepositoryIDs: rs, - }, - } - } - } - return ir, nil -} diff --git a/java/ecosystem.go b/java/ecosystem.go index 1cca07347..894ab920c 100644 --- a/java/ecosystem.go +++ b/java/ecosystem.go @@ -4,10 +4,11 @@ import ( "context" "github.com/quay/claircore/indexer" + "github.com/quay/claircore/language" ) // NewEcosystem provides the set of scanners for the java ecosystem. -func NewEcosystem(ctx context.Context) *indexer.Ecosystem { +func NewEcosystem(_ context.Context) *indexer.Ecosystem { return &indexer.Ecosystem{ PackageScanners: func(_ context.Context) ([]indexer.PackageScanner, error) { return []indexer.PackageScanner{&Scanner{}}, nil @@ -16,8 +17,6 @@ func NewEcosystem(ctx context.Context) *indexer.Ecosystem { RepositoryScanners: func(_ context.Context) ([]indexer.RepositoryScanner, error) { return nil, nil }, - Coalescer: func(_ context.Context) (indexer.Coalescer, error) { - return (*coalescer)(nil), nil - }, + Coalescer: language.NewCoalescer, } } diff --git a/nodejs/coalescer.go b/language/coalescer.go similarity index 51% rename from nodejs/coalescer.go rename to language/coalescer.go index 469cc7446..8c1b4dc3c 100644 --- a/nodejs/coalescer.go +++ b/language/coalescer.go @@ -1,9 +1,9 @@ -package nodejs +package language import ( "context" - "github.com/quay/claircore" + "github.com/quay/claircore" "github.com/quay/claircore/indexer" ) @@ -21,9 +21,14 @@ func (c *coalescer) Coalesce(_ context.Context, ls []*indexer.LayerArtifacts) (* Packages: map[string]*claircore.Package{}, Repositories: map[string]*claircore.Repository{}, } - - for _, l := range ls { - // If we didn't find at least one npm repo in this layer + // Similar to ir.Packages, except instead of mapping + // id -> package, it maps packageDB -> package. + // For langauge packages, it is possible the + // packageDB is overwritten. + packages := make(map[string]*claircore.Package) + for i := len(ls) - 1; i >= 0; i-- { + l := ls[i] + // If we didn't find at least one repo in this layer // no point in searching for packages. if len(l.Repos) == 0 { continue @@ -34,6 +39,19 @@ func (c *coalescer) Coalesce(_ context.Context, ls []*indexer.LayerArtifacts) (* ir.Repositories[r.ID] = r } for _, pkg := range l.Pkgs { + if childPkg, exists := packages[pkg.PackageDB]; exists { + // If the package was renamed or has a different version in a high layer, + // then we consider this a different package and ignore the + // original in the lower layer. + if pkg.Name != childPkg.Name || pkg.Version != childPkg.Version { + continue + } + // The name and version is the same, so delete the entry related to the higher + // layer, as this package was likely introduced in the lower layer. + delete(ir.Packages, childPkg.ID) + delete(ir.Environments, childPkg.ID) + } + packages[pkg.PackageDB] = pkg ir.Packages[pkg.ID] = pkg ir.Environments[pkg.ID] = []*claircore.Environment{ { diff --git a/language/coalescer_test.go b/language/coalescer_test.go new file mode 100644 index 000000000..b58508ce4 --- /dev/null +++ b/language/coalescer_test.go @@ -0,0 +1,155 @@ +package language + +import ( + "context" + "strconv" + "testing" + + "github.com/quay/zlog" + + "github.com/quay/claircore" + "github.com/quay/claircore/indexer" + "github.com/quay/claircore/test" +) + +func TestCoalescer(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) + coalescer := &coalescer{} + pkgs := test.GenUniquePackages(6) + repo := []*claircore.Repository{{ + Name: "npm", + URI: "https://www.npmjs.com/", + }} + layerArtifacts := []*indexer.LayerArtifacts{ + { + Hash: test.RandomSHA256Digest(t), + Pkgs: pkgs[:1], + }, + { + Hash: test.RandomSHA256Digest(t), + Pkgs: pkgs[:2], + }, + { + Hash: test.RandomSHA256Digest(t), + Pkgs: pkgs[:3], + Repos: repo, + }, + { + Hash: test.RandomSHA256Digest(t), + Pkgs: pkgs[:4], + }, + { + Hash: test.RandomSHA256Digest(t), + Pkgs: pkgs[:5], + Repos: repo, + }, + { + Hash: test.RandomSHA256Digest(t), + Pkgs: pkgs, + }, + } + ir, err := coalescer.Coalesce(ctx, layerArtifacts) + if err != nil { + t.Fatalf("received error from coalesce method: %v", err) + } + // Expect 0-5 to have gotten associated with the repository. + for i := range pkgs { + es, ok := ir.Environments[strconv.Itoa(i)] + if !ok && i == 5 { + // Left out the last package. + continue + } + e := es[0] + if len(e.RepositoryIDs) == 0 { + t.Error("expected some repositories") + } + for _, id := range e.RepositoryIDs { + r := ir.Repositories[id] + if got, want := r.Name, "npm"; got != want { + t.Errorf("got: %q, want: %q", got, want) + } + } + } +} + +func TestCoalescer_package_overwrite(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) + coalescer := &coalescer{} + repo := []*claircore.Repository{{ + Name: "npm", + URI: "https://www.npmjs.com/", + }} + hashes := []claircore.Digest{ + test.RandomSHA256Digest(t), + test.RandomSHA256Digest(t), + test.RandomSHA256Digest(t), + test.RandomSHA256Digest(t), + } + layerArtifacts := []*indexer.LayerArtifacts{ + { + Hash: hashes[0], + Pkgs: []*claircore.Package{ + { + ID: "0", + Name: "semver", + Version: "7.3.8", + PackageDB: "nodejs:usr/local/lib/node_modules/npm/node_modules/semver/package.json", + }, + }, + Repos: repo, + }, + { + Hash: hashes[1], + }, + { + Hash: hashes[2], + Pkgs: []*claircore.Package{ + { + ID: "1", + Name: "semver", + Version: "7.5.2", + PackageDB: "nodejs:usr/local/lib/node_modules/npm/node_modules/semver/package.json", + }, + }, + Repos: repo, + }, + { + Hash: hashes[3], + Pkgs: []*claircore.Package{ + { + ID: "2", + Name: "semver", + Version: "7.5.2", + PackageDB: "nodejs:usr/local/lib/node_modules/npm/node_modules/semver/package.json", + }, + }, + Repos: repo, + }, + } + ir, err := coalescer.Coalesce(ctx, layerArtifacts) + if err != nil { + t.Fatalf("received error from coalesce method: %v", err) + } + if len(ir.Packages) != 1 { + t.Fatalf("unexpected number of packages: %d != %d", len(ir.Packages), 1) + } + pkg, exists := ir.Packages["1"] + if !exists { + t.Fatal("expected package does not exist") + } + if pkg.Version != "7.5.2" { + t.Fatalf("unexpected version: %s != %s", pkg.Version, "7.5.2") + } + envs, exists := ir.Environments["1"] + if !exists { + t.Fatal("expected environments do not exist") + } + if len(envs) != 1 { + t.Fatalf("unexpected number of envionments: %d != %d", len(envs), 1) + } + if envs[0].IntroducedIn.String() != hashes[2].String() { + t.Fatalf("unexpected introducedIn: %s != %s", envs[0].IntroducedIn.String(), hashes[2].String()) + } +} diff --git a/nodejs/coalescer_test.go b/nodejs/coalescer_test.go deleted file mode 100644 index dba365f03..000000000 --- a/nodejs/coalescer_test.go +++ /dev/null @@ -1,71 +0,0 @@ -package nodejs - -import ( - "context" - "strconv" - "testing" - - "github.com/quay/zlog" - - "github.com/quay/claircore" - "github.com/quay/claircore/indexer" - "github.com/quay/claircore/test" -) - -func TestCoalescer(t *testing.T) { - t.Parallel() - ctx := zlog.Test(context.Background(), t) - coalescer := &coalescer{} - pkgs := test.GenUniquePackages(6) - repo := []*claircore.Repository{&Repository} - layerArtifacts := []*indexer.LayerArtifacts{ - { - Hash: test.RandomSHA256Digest(t), - Pkgs: pkgs[:1], - }, - { - Hash: test.RandomSHA256Digest(t), - Pkgs: pkgs[:2], - }, - { - Hash: test.RandomSHA256Digest(t), - Pkgs: pkgs[:3], - Repos: repo, - }, - { - Hash: test.RandomSHA256Digest(t), - Pkgs: pkgs[:4], - }, - { - Hash: test.RandomSHA256Digest(t), - Pkgs: pkgs[:5], - Repos: repo, - }, - { - Hash: test.RandomSHA256Digest(t), - Pkgs: pkgs, - }, - } - ir, err := coalescer.Coalesce(ctx, layerArtifacts) - if err != nil { - t.Fatalf("received error from coalesce method: %v", err) - } - // Expect 0-5 to have gotten associated with the repository. - for i := range pkgs { - es, ok := ir.Environments[strconv.Itoa(i)] - if !ok && i == 5 { - // Left out the last package. - continue - } - e := es[0] - if len(e.RepositoryIDs) == 0 { - t.Error("expected some repositories") - } - for _, id := range e.RepositoryIDs { - r := ir.Repositories[id] - if got, want := r.Name, Repository.Name; got != want { - t.Errorf("got: %q, want: %q", got, want) - } - } - } -} diff --git a/nodejs/ecosystem.go b/nodejs/ecosystem.go index de6f84c83..3f958919d 100644 --- a/nodejs/ecosystem.go +++ b/nodejs/ecosystem.go @@ -4,6 +4,7 @@ import ( "context" "github.com/quay/claircore/indexer" + "github.com/quay/claircore/language" ) var scanners = []indexer.PackageScanner{&Scanner{}} @@ -14,6 +15,6 @@ func NewEcosystem(_ context.Context) *indexer.Ecosystem { PackageScanners: func(_ context.Context) ([]indexer.PackageScanner, error) { return scanners, nil }, DistributionScanners: func(_ context.Context) ([]indexer.DistributionScanner, error) { return nil, nil }, RepositoryScanners: func(_ context.Context) ([]indexer.RepositoryScanner, error) { return nil, nil }, - Coalescer: NewCoalescer, + Coalescer: language.NewCoalescer, } } diff --git a/python/coalescer.go b/python/coalescer.go deleted file mode 100644 index d0d713881..000000000 --- a/python/coalescer.go +++ /dev/null @@ -1,47 +0,0 @@ -package python - -import ( - "context" - - "github.com/quay/claircore" - "github.com/quay/claircore/indexer" -) - -func NewCoalescer(_ context.Context) (indexer.Coalescer, error) { - return &coalescer{}, nil -} - -type coalescer struct { -} - -func (c *coalescer) Coalesce(ctx context.Context, ls []*indexer.LayerArtifacts) (*claircore.IndexReport, error) { - ir := &claircore.IndexReport{ - Environments: map[string][]*claircore.Environment{}, - Packages: map[string]*claircore.Package{}, - Repositories: map[string]*claircore.Repository{}, - } - - for _, l := range ls { - // If we didn't find at least one pip repo in this layer - // no point in searching for packages. - if len(l.Repos) == 0 { - continue - } - rs := make([]string, len(l.Repos)) - for i, r := range l.Repos { - rs[i] = r.ID - ir.Repositories[r.ID] = r - } - for _, pkg := range l.Pkgs { - ir.Packages[pkg.ID] = pkg - ir.Environments[pkg.ID] = []*claircore.Environment{ - &claircore.Environment{ - PackageDB: pkg.PackageDB, - IntroducedIn: l.Hash, - RepositoryIDs: rs, - }, - } - } - } - return ir, nil -} diff --git a/python/ecosystem.go b/python/ecosystem.go index 7458c9edd..9cd3b2a00 100644 --- a/python/ecosystem.go +++ b/python/ecosystem.go @@ -4,6 +4,7 @@ import ( "context" "github.com/quay/claircore/indexer" + "github.com/quay/claircore/language" ) var scanners = []indexer.PackageScanner{&Scanner{}} @@ -14,6 +15,6 @@ func NewEcosystem(ctx context.Context) *indexer.Ecosystem { PackageScanners: func(_ context.Context) ([]indexer.PackageScanner, error) { return scanners, nil }, DistributionScanners: func(_ context.Context) ([]indexer.DistributionScanner, error) { return nil, nil }, RepositoryScanners: func(_ context.Context) ([]indexer.RepositoryScanner, error) { return nil, nil }, - Coalescer: NewCoalescer, + Coalescer: language.NewCoalescer, } } diff --git a/ruby/coalescer.go b/ruby/coalescer.go deleted file mode 100644 index cb0d33ef1..000000000 --- a/ruby/coalescer.go +++ /dev/null @@ -1,46 +0,0 @@ -package ruby - -import ( - "context" - - "github.com/quay/claircore" - "github.com/quay/claircore/indexer" -) - -func NewCoalescer(_ context.Context) (indexer.Coalescer, error) { - return &coalescer{}, nil -} - -type coalescer struct{} - -func (c *coalescer) Coalesce(_ context.Context, ls []*indexer.LayerArtifacts) (*claircore.IndexReport, error) { - ir := &claircore.IndexReport{ - Environments: map[string][]*claircore.Environment{}, - Packages: map[string]*claircore.Package{}, - Repositories: map[string]*claircore.Repository{}, - } - - for _, l := range ls { - // If we didn't find at least one gem repo in this layer - // no point in searching for packages. - if len(l.Repos) == 0 { - continue - } - rs := make([]string, len(l.Repos)) - for i, r := range l.Repos { - rs[i] = r.ID - ir.Repositories[r.ID] = r - } - for _, pkg := range l.Pkgs { - ir.Packages[pkg.ID] = pkg - ir.Environments[pkg.ID] = []*claircore.Environment{ - { - PackageDB: pkg.PackageDB, - IntroducedIn: l.Hash, - RepositoryIDs: rs, - }, - } - } - } - return ir, nil -} diff --git a/ruby/ecosystem.go b/ruby/ecosystem.go index 75f7208cd..985ecdb01 100644 --- a/ruby/ecosystem.go +++ b/ruby/ecosystem.go @@ -4,6 +4,7 @@ import ( "context" "github.com/quay/claircore/indexer" + "github.com/quay/claircore/language" ) var scanners = []indexer.PackageScanner{&Scanner{}} @@ -14,6 +15,6 @@ func NewEcosystem(_ context.Context) *indexer.Ecosystem { PackageScanners: func(_ context.Context) ([]indexer.PackageScanner, error) { return scanners, nil }, DistributionScanners: func(_ context.Context) ([]indexer.DistributionScanner, error) { return nil, nil }, RepositoryScanners: func(_ context.Context) ([]indexer.RepositoryScanner, error) { return nil, nil }, - Coalescer: NewCoalescer, + Coalescer: language.NewCoalescer, } }