diff --git a/.gitignore b/.gitignore index 1256344d..43b406ba 100644 --- a/.gitignore +++ b/.gitignore @@ -8,5 +8,6 @@ **/server config.json +config.local.json config.*.test.json result diff --git a/vervet-underground/internal/scraper/scraper.go b/vervet-underground/internal/scraper/scraper.go index e3d529ed..2cd13122 100644 --- a/vervet-underground/internal/scraper/scraper.go +++ b/vervet-underground/internal/scraper/scraper.go @@ -155,6 +155,12 @@ func (s *Scraper) scrape(ctx context.Context, scrapeTime time.Time, svc service) for i := range versions { // TODO: we might run this concurrently per live service pod if/when // we're more k8s aware, but we won't do that yet. + + // Skip if it's a legacy api using the default legacy version. + if isLegacyVersion(versions[i]) { + continue + } + contents, isNew, err := s.getNewVersion(ctx, svc, versions[i]) if err != nil { return errors.WithStack(err) @@ -275,6 +281,12 @@ func (s *Scraper) hasNewVersion(ctx context.Context, svc service, version string return s.storage.HasVersion(ctx, svc.name, version, digest) } +// isLegacyVersion is used to identify legacy APIs which should be excluded. +func isLegacyVersion(version string) bool { + // This default version predates vervet's creation date. + return version == "2021-01-01" +} + func (s *Scraper) Versions() vervet.VersionSlice { return s.storage.Versions() } diff --git a/vervet-underground/internal/scraper/scraper_test.go b/vervet-underground/internal/scraper/scraper_test.go index 5e43961f..3d1b42f1 100644 --- a/vervet-underground/internal/scraper/scraper_test.go +++ b/vervet-underground/internal/scraper/scraper_test.go @@ -39,8 +39,9 @@ var ( }, } animals = &testService{ - versions: []string{"2021-10-01", "2021-10-16"}, + versions: []string{"2021-01-01", "2021-10-01", "2021-10-16"}, contents: map[string]string{ + "2021-01-01": `{"paths":{"/legacy": {}}}`, "2021-10-01": `{"paths":{"/geckos": {}}}`, "2021-10-16": `{"paths":{"/geckos": {}, "/puppies": {}}}`, }, @@ -136,6 +137,43 @@ func TestScraper(t *testing.T) { } } +func TestScraperWithLegacy(t *testing.T) { + c := qt.New(t) + + _, animalsService := setupHttpServers(c) + tests := []struct { + name, version, digest string + }{ + {"animals", "2021-01-01", "sha256:XX2f9c3iySLCw54rJ/CZs+ZK6IQy7GXNY4nSOyu2QG4="}, + } + + cfg := &config.ServerConfig{ + Services: []config.ServiceConfig{ + { + Name: "animals", URL: animalsService.URL, + }, + }, + } + st := mem.New() + sc, err := scraper.New(cfg, st, scraper.Clock(func() time.Time { return t0 })) + c.Assert(err, qt.IsNil) + + // Cancel the scrape context after a timeout so we don't hang the test + ctx, cancel := context.WithTimeout(context.Background(), time.Second*60) + c.Cleanup(cancel) + + // Run the scrape + err = sc.Run(ctx) + c.Assert(err, qt.IsNil) + + // Legacy (default) version should not be stored + for _, test := range tests { + ok, err := st.HasVersion(ctx, test.name, test.version, test.digest) + c.Assert(err, qt.IsNil) + c.Assert(ok, qt.IsFalse) + } +} + func TestEmptyScrape(t *testing.T) { c := qt.New(t) cfg := &config.ServerConfig{