Skip to content

Commit

Permalink
encoding/jsonschema: fetch the test suite GitHub repo via a zip
Browse files Browse the repository at this point in the history
Rather than `git clone` followed by `git checkout`, which is slower
and requires the VCS tool to be installed, we can directly download
a ZIP archive from GitHub via the documented API:

    https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28#download-a-repository-archive-zip

Note that this API is specific to GitHub, but practically every
VCS hosting service provides an extremely similar API.

As a bonus, fetching a ZIP means we can open it as an io/fs.FS directly
as long as we hold it all in memory, which is easy at the current size
of about 470KiB.

The runtime of this `go generate` step drops from about 1.5s
to about 1.2s on my laptop. As measured by the logger which now includes
precise timestamps, the fetching of the archive is now about 250ms,
now comparable to Go compilation at ~200ms and loading CUE at ~500ms.

While here, I noticed an unchecked error and a confusing error return
at the end of the function. Add a log line at the end too, to measure
how long the entire program took to do its job, as well as the filename
as the log prefix, so that the source is obvious in `go generate`.

I verified that it all works as expected by deleting the files
and re-generating and updating them from scratch.
The only changes are to stale skip strings, which Roger is fixing
in a parallel chain of CLs.

Signed-off-by: Daniel Martí <[email protected]>
Change-Id: Ia780ade3ca1ce27ef755a026f7545b173dafb17e
Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1200925
TryBot-Result: CUEcueckoo <[email protected]>
Reviewed-by: Roger Peppe <[email protected]>
  • Loading branch information
mvdan committed Sep 10, 2024
1 parent 0aeef9f commit 7a3260c
Showing 1 changed file with 32 additions and 25 deletions.
57 changes: 32 additions & 25 deletions encoding/jsonschema/vendor_external.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,16 @@
package main

import (
"archive/zip"
"bytes"
"errors"
"flag"
"fmt"
"io"
"io/fs"
"log"
"net/http"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
Expand All @@ -45,6 +48,7 @@ func main() {
fmt.Fprintf(os.Stderr, "usage: vendor-external commit\n")
os.Exit(2)
}
log.SetFlags(log.Lshortfile | log.Ltime | log.Lmicroseconds)
flag.Parse()
if flag.NArg() != 1 {
flag.Usage()
Expand All @@ -55,31 +59,42 @@ func main() {
}

func doVendor(commit string) error {
tmpDir, err := os.MkdirTemp("", "")
// Fetch a commit from GitHub via their archive ZIP endpoint, which is a lot faster
// than git cloning just to retrieve a single commit's files.
// See: https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28#download-a-repository-archive-zip
zipURL := fmt.Sprintf("https://github.com/json-schema-org/JSON-Schema-Test-Suite/archive/%s.zip", commit)
log.Printf("fetching %s", zipURL)
resp, err := http.Get(zipURL)
if err != nil {
return err
}
defer os.RemoveAll(tmpDir)
logf("cloning %s", testRepo)
if err := runCmd(tmpDir, "git", "clone", "-q", testRepo, "."); err != nil {
return err
}
logf("checking out commit %s", commit)
if err := runCmd(tmpDir, "git", "checkout", "-q", commit); err != nil {
defer resp.Body.Close()
zipBytes, err := io.ReadAll(resp.Body)
if err != nil {
return err
}
logf("reading old test data")

log.Printf("reading old test data")
oldTests, err := externaltest.ReadTestDir(testDir)
if err != nil && !errors.Is(err, externaltest.ErrNotFound) {
return err
}
logf("copying files to %s", testDir)

log.Printf("copying files to %s", testDir)
testSubdir := filepath.Join(testDir, "tests")
if err := os.RemoveAll(testSubdir); err != nil {
return err
}
fsys := os.DirFS(filepath.Join(tmpDir, "tests"))
zipr, err := zip.NewReader(bytes.NewReader(zipBytes), int64(len(zipBytes)))
if err != nil {
return err
}
// Note that GitHub produces archives with a top-level directory representing
// the name of the repository and the version which was retrieved.
fsys, err := fs.Sub(zipr, fmt.Sprintf("JSON-Schema-Test-Suite-%s/tests", commit))
if err != nil {
return err
}
err = fs.WalkDir(fsys, ".", func(filename string, d fs.DirEntry, err error) error {
if err != nil {
return err
Expand Down Expand Up @@ -107,6 +122,9 @@ func doVendor(commit string) error {
}
return nil
})
if err != nil {
return err
}

// Read the test data back that we've just written and attempt
// to populate skip data from the original test data.
Expand Down Expand Up @@ -152,23 +170,12 @@ func doVendor(commit string) error {
if err := externaltest.WriteTestDir(testDir, newTests); err != nil {
return err
}
return err
log.Printf("finished")
return nil
}

type skipKey struct {
filename string
schema string
test string
}

func runCmd(dir string, name string, args ...string) error {
c := exec.Command(name, args...)
c.Dir = dir
c.Stdout = os.Stdout
c.Stderr = os.Stderr
return c.Run()
}

func logf(f string, a ...any) {
fmt.Fprintf(os.Stderr, "%s\n", fmt.Sprintf(f, a...))
}

0 comments on commit 7a3260c

Please sign in to comment.