Skip to content

Commit

Permalink
feat: use publiccode-parser-go v4
Browse files Browse the repository at this point in the history
  • Loading branch information
bfabio committed Jun 4, 2024
1 parent c0b5880 commit 4002428
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 22 deletions.
42 changes: 23 additions & 19 deletions crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
"github.com/italia/publiccode-crawler/v4/git"
"github.com/italia/publiccode-crawler/v4/metrics"
"github.com/italia/publiccode-crawler/v4/scanner"
publiccode "github.com/italia/publiccode-parser-go/v3"
publiccode "github.com/italia/publiccode-parser-go/v4"
log "github.com/sirupsen/logrus"
"github.com/spf13/viper"
"golang.org/x/exp/slices"
Expand Down Expand Up @@ -327,15 +327,6 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx
),
)

var parser *publiccode.Parser
parser, err = publiccode.NewParser(repository.FileRawURL)
if err != nil {
logEntries = append(logEntries, fmt.Sprintf("[%s] BAD publiccode.yml: %s\n", repository.Name, err.Error()))
metrics.GetCounter("repository_bad_publiccodeyml", c.Index).Inc()

return
}

//nolint:godox
// FIXME: this is hardcoded for now, because it requires changes to publiccode-parser-go.
domain := publiccode.Domain{
Expand All @@ -344,9 +335,22 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx
BasicAuth: []string{os.Getenv("GITHUB_TOKEN")},
}

var parser *publiccode.Parser
parser, err = publiccode.NewParser(publiccode.ParserConfig{Domain: domain})
if err != nil {
logEntries = append(
logEntries,
fmt.Sprintf("[%s] can't create a Parser: %s\n", repository.Name, err.Error()),
)

return
}

var parsed publiccode.PublicCode
parsed, err = parser.Parse(repository.FileRawURL)

valid := true

err = parser.ParseInDomain(resp.Body, domain.Host, domain.UseTokenFor, domain.BasicAuth)
if err != nil {
var validationResults publiccode.ValidationResults
if errors.As(err, &validationResults) {
Expand All @@ -363,7 +367,7 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx

publisherID := viper.GetString("MAIN_PUBLISHER_ID")
if valid && repository.Publisher.ID != publisherID {
err = validateFile(repository.Publisher, *parser, repository.FileRawURL)
err = validateFile(repository.Publisher, parsed.(publiccode.PublicCodeV0), repository.FileRawURL)

Check failure on line 370 in crawler/crawler.go

View workflow job for this annotation

GitHub Actions / go-lint

type assertion must be checked (forcetypeassert)

Check failure on line 370 in crawler/crawler.go

View workflow job for this annotation

GitHub Actions / go-lint

type assertion must be checked (forcetypeassert)
if err != nil {
valid = false
}
Expand Down Expand Up @@ -391,7 +395,7 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx
aliases = append(aliases, repository.URL.String())
}

publiccodeYml, err := parser.ToYAML()
publiccodeYml, err := parsed.ToYAML()
if err != nil {
logEntries = append(logEntries, fmt.Sprintf("[%s] parsing error: %s", repository.Name, err.Error()))

Expand Down Expand Up @@ -433,7 +437,7 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx

if !viper.GetBool("SKIP_VITALITY") && !c.DryRun {
// Clone repository.
err = git.CloneRepository(repository.URL.Host, repository.Name, parser.PublicCode.URL.String(), c.Index)
err = git.CloneRepository(repository.URL.Host, repository.Name, parsed.Url().String(), c.Index)
if err != nil {
logEntries = append(logEntries, fmt.Sprintf("[%s] error while cloning: %v\n", repository.Name, err))
}
Expand All @@ -460,11 +464,11 @@ func (c *Crawler) ProcessRepo(repository common.Repository) { //nolint:maintidx
// validateFile performs additional validations that are not strictly mandated
// by the publiccode.yml Standard.
// Using `one` command this check will be skipped.
func validateFile(publisher common.Publisher, parser publiccode.Parser, fileRawURL string) error {
func validateFile(publisher common.Publisher, parsed publiccode.PublicCodeV0, fileRawURL string) error {
u, _ := url.Parse(fileRawURL)
repo1 := vcsurl.GetRepo(u)

repo2 := vcsurl.GetRepo((*url.URL)(parser.PublicCode.URL))
repo2 := vcsurl.GetRepo((*url.URL)(parsed.Url()))

if repo1 != nil && repo2 != nil {
// Let's ignore the schema when checking for equality.
Expand All @@ -477,7 +481,7 @@ func validateFile(publisher common.Publisher, parser publiccode.Parser, fileRawU
return fmt.Errorf(
"declared url (%s) and actual publiccode.yml location URL (%s) "+
"are not in the same repo: '%s' vs '%s'",
parser.PublicCode.URL, fileRawURL, repo2, repo1,
parsed.Url(), fileRawURL, repo2, repo1,
)
}
}
Expand All @@ -494,11 +498,11 @@ func validateFile(publisher common.Publisher, parser publiccode.Parser, fileRawU

if !idIsUUID && !strings.EqualFold(
strings.TrimSpace(publisher.ID),
strings.TrimSpace(parser.PublicCode.It.Riuso.CodiceIPA),
strings.TrimSpace(parsed.It.Riuso.CodiceIPA),
) {
return fmt.Errorf(
"codiceIPA is '%s', but '%s' was expected for '%s' in %s",
parser.PublicCode.It.Riuso.CodiceIPA,
parsed.It.Riuso.CodiceIPA,
publisher.ID,
publisher.Name,
fileRawURL,
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ require (
github.com/google/go-github/v43 v43.0.0
github.com/hashicorp/go-retryablehttp v0.7.1
github.com/italia/httpclient-lib-go v0.0.2
github.com/italia/publiccode-parser-go/v3 v3.1.3
github.com/italia/publiccode-parser-go/v4 v4.0.0
github.com/ktrysmt/go-bitbucket v0.9.63
github.com/prometheus/client_golang v1.11.1
github.com/prometheus/client_model v0.3.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -914,8 +914,8 @@ github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NH
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/italia/httpclient-lib-go v0.0.2 h1:4bJLywTVd7qHPdKxJXvvhlXp436JTC4KA6dLhIl5a6c=
github.com/italia/httpclient-lib-go v0.0.2/go.mod h1:b0/D3ULsBw8X+zEl7j/kSZmiMlUdj+agppneOvSq6eA=
github.com/italia/publiccode-parser-go/v3 v3.1.3 h1:o4x0K6dJgBUh1Tac99AJEyM6uNV+e1GEJ7F5r+uMNMI=
github.com/italia/publiccode-parser-go/v3 v3.1.3/go.mod h1:TvC+rGxBbIE+riQyey4GFtyC7GgKKTGmtsHINIp4aQ8=
github.com/italia/publiccode-parser-go/v4 v4.0.0 h1:1/q4VH7WtnkaJyq3Cn0HU0KUz9X6uRPzmxNariKuI3w=
github.com/italia/publiccode-parser-go/v4 v4.0.0/go.mod h1:qmxP/BgLwCeNMjfcXbRz8WCMPn85Pekcy+oGTUmEF4U=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo=
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
Expand Down

0 comments on commit 4002428

Please sign in to comment.