From 27b56d38d6077700b14323dbc16230a63b0c6467 Mon Sep 17 00:00:00 2001 From: Jonathan Poole Date: Wed, 15 Nov 2023 12:42:08 +0000 Subject: [PATCH] Implement an iofs.FS over the REAPI Tree proto (#2955) * Implement an iofs.FS over the REAPI Tree proto * lint * Refactor a bit and add a FindNode which will be useful when preparing sources * Update some comments * log.Fatalf not panic * lint * Fix . in path * Add another test to verify . in the middle of a path works * Ah the size is on the digest * Add test to see if we get the size back for the file * Handle nil node properties in ReadDir --- .plzconfig.localremote | 15 +++ please-servers-token.txt | 1 + src/remote/fs/BUILD | 27 +++++ src/remote/fs/fs.go | 243 +++++++++++++++++++++++++++++++++++++++ src/remote/fs/fs_test.go | 241 ++++++++++++++++++++++++++++++++++++++ src/remote/fs/info.go | 67 +++++++++++ 6 files changed, 594 insertions(+) create mode 100644 .plzconfig.localremote create mode 100644 please-servers-token.txt create mode 100644 src/remote/fs/BUILD create mode 100644 src/remote/fs/fs.go create mode 100644 src/remote/fs/fs_test.go create mode 100644 src/remote/fs/info.go diff --git a/.plzconfig.localremote b/.plzconfig.localremote new file mode 100644 index 0000000000..387114c99d --- /dev/null +++ b/.plzconfig.localremote @@ -0,0 +1,15 @@ +# Steps to build with remote execution locally: +# 1) git clone https://github.com/thought-machine/please-servers +# 2) cd please-servers && plz localremote +# 3) you can then build and run with --profile localremote in this repo + +[Remote] +URL = 127.0.0.1:7772 +CasUrl = 127.0.0.1:7777 +AssetUrl = 127.0.0.1:7776 +NumExecutors = 20 +# This file should be kept up to date with the file in grpcutil/token.txt from please-servers +TokenFile = please-servers-token.txt +Secure = false +DisplayUrl = http://localhost:7779 +Instance = mettle diff --git a/please-servers-token.txt b/please-servers-token.txt new file mode 100644 index 0000000000..36a332b868 --- /dev/null +++ b/please-servers-token.txt @@ -0,0 +1 @@ +ovES3eR7-nBs5pgCpyrfY0kzepyrKK7w diff --git a/src/remote/fs/BUILD b/src/remote/fs/BUILD new file mode 100644 index 0000000000..f26be4ef3c --- /dev/null +++ b/src/remote/fs/BUILD @@ -0,0 +1,27 @@ +go_library( + name = "fs", + srcs = [ + "fs.go", + "info.go", + ], + deps = [ + "///third_party/go/github.com_bazelbuild_remote-apis-sdks//go/pkg/client", + "///third_party/go/github.com_bazelbuild_remote-apis-sdks//go/pkg/digest", + "///third_party/go/github.com_bazelbuild_remote-apis//build/bazel/remote/execution/v2", + "//src/cli/logging", + ], +) + +go_test( + name = "fs_test", + srcs = ["fs_test.go"], + deps = [ + ":fs", + "///third_party/go/github.com_bazelbuild_remote-apis-sdks//go/pkg/client", + "///third_party/go/github.com_bazelbuild_remote-apis-sdks//go/pkg/digest", + "///third_party/go/github.com_bazelbuild_remote-apis//build/bazel/remote/execution/v2", + "///third_party/go/github.com_golang_protobuf//ptypes/wrappers", + "///third_party/go/github.com_stretchr_testify//assert", + "///third_party/go/github.com_stretchr_testify//require", + ], +) diff --git a/src/remote/fs/fs.go b/src/remote/fs/fs.go new file mode 100644 index 0000000000..1f6f866146 --- /dev/null +++ b/src/remote/fs/fs.go @@ -0,0 +1,243 @@ +// Package fs provides an io/fs.FS implementation over the remote execution API content addressable store (CAS) +package fs + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + iofs "io/fs" + "os" + "path/filepath" + "strings" + + "github.com/bazelbuild/remote-apis-sdks/go/pkg/client" + "github.com/bazelbuild/remote-apis-sdks/go/pkg/digest" + pb "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + + "github.com/thought-machine/please/src/cli/logging" +) + +var log = logging.Log + +// Client is an interface to the REAPI CAS +type Client interface { + ReadBlob(ctx context.Context, d digest.Digest) ([]byte, *client.MovedBytesMetadata, error) +} + +// CASFileSystem is an fs.FS implemented on top of a Tree proto. This will download files as they are needed from the +// CAS when they are opened. +type CASFileSystem struct { + c Client + root *pb.Directory + directories map[digest.Digest]*pb.Directory + workingDir string +} + +// New creates a new filesystem on top of the given proto, using client to download files from the CAS on demand. +func New(c Client, tree *pb.Tree, workingDir string) *CASFileSystem { + directories := make(map[digest.Digest]*pb.Directory, len(tree.Children)) + for _, child := range append(tree.Children, tree.Root) { + dg, err := digest.NewFromMessage(child) + if err != nil { + log.Fatalf("Failed to create CASFileSystem: failed to calculate digest: %v", err) + } + directories[dg] = child + } + + return &CASFileSystem{ + c: c, + root: tree.Root, + directories: directories, + workingDir: filepath.Clean(workingDir), + } +} + +// Open opens the file with the given name +func (fs *CASFileSystem) Open(name string) (iofs.File, error) { + return fs.open(filepath.Join(fs.workingDir, name)) +} + +// FindNode returns the node proto for the given name. Either FileNode, DirectoryNode or SymlinkNode will be set, or an +// error will be returned. The error will be os.ErrNotExist if the path doesn't exist. +func (fs *CASFileSystem) FindNode(name string) (*pb.FileNode, *pb.DirectoryNode, *pb.SymlinkNode, error) { + return fs.findNode(fs.root, filepath.Join(fs.workingDir, name)) +} + +func (fs *CASFileSystem) open(name string) (iofs.File, error) { + fileNode, dirNode, linkNode, err := fs.findNode(fs.root, name) + if err != nil { + return nil, err + } + + if linkNode != nil { + if filepath.IsAbs(linkNode.Target) { + return nil, fmt.Errorf("%v: symlink target was absolute which is invalid", name) + } + return fs.open(filepath.Join(filepath.Dir(name), linkNode.Target)) + } + + if fileNode != nil { + return fs.openFile(fileNode) + } + if dirNode != nil { + return fs.openDir(dirNode) + } + return nil, os.ErrNotExist +} + +// openFile downloads a file from the CAS and returns it as an iofs.File +func (fs *CASFileSystem) openFile(f *pb.FileNode) (*file, error) { + bs, _, err := fs.c.ReadBlob(context.Background(), digest.NewFromProtoUnvalidated(f.Digest)) + if err != nil { + return nil, err + } + + i := info{ + size: int64(len(bs)), + name: f.Name, + } + + return &file{ + ReadSeeker: bytes.NewReader(bs), + info: i.withProperties(f.NodeProperties), + }, nil +} + +func (fs *CASFileSystem) openDir(d *pb.DirectoryNode) (iofs.File, error) { + dirPb := fs.directories[digest.NewFromProtoUnvalidated(d.Digest)] + i := &info{ + name: d.Name, + isDir: true, + } + return &dir{ + info: i.withProperties(dirPb.NodeProperties), + pb: dirPb, + children: fs.directories, + }, nil +} + +func (fs *CASFileSystem) findNode(wd *pb.Directory, name string) (*pb.FileNode, *pb.DirectoryNode, *pb.SymlinkNode, error) { + // When the path contains a /, we only want to match name as a directory. This is because if we have foo/bar, and we + // matched foo as a file, we still need to descend further, which we can't do if it's a file or symlink. + name, rest, hasToBeDir := strings.Cut(name, string(filepath.Separator)) + + if name == "." { + if rest != "" { + return fs.findNode(wd, rest) + } + dg, err := digest.NewFromMessage(wd) + if err != nil { + return nil, nil, nil, err + } + node := &pb.DirectoryNode{Name: ".", Digest: dg.ToProto()} + return nil, node, nil, nil + } + + // Must be a dodgy symlink that goes past our tree. + if name == ".." { + return nil, nil, nil, os.ErrNotExist + } + + for _, d := range wd.Directories { + if d.Name == name { + dirPb := fs.directories[digest.NewFromProtoUnvalidated(d.Digest)] + if rest == "" { + return nil, d, nil, nil + } + return fs.findNode(dirPb, rest) + } + } + + if hasToBeDir { + return nil, nil, nil, os.ErrNotExist + } + + for _, f := range wd.Files { + if f.Name == name { + return f, nil, nil, nil + } + } + + for _, l := range wd.Symlinks { + if l.Name == name { + return nil, nil, l, nil + } + } + return nil, nil, nil, os.ErrNotExist +} + +type file struct { + io.ReadSeeker + *info +} + +func (b *file) Stat() (iofs.FileInfo, error) { + return b, nil +} +func (b *file) Close() error { + return nil +} + +type dir struct { + pb *pb.Directory + children map[digest.Digest]*pb.Directory + *info +} + +// ReadDir implements listing the contents of a directory stored in the CAS. This is entirely based off the original +// data from the Tree proto so doesn't do any additional fetching. +func (p *dir) ReadDir(n int) ([]iofs.DirEntry, error) { + dirSize := n + if n <= 0 { + dirSize = len(p.pb.Files) + len(p.pb.Symlinks) + len(p.pb.Files) + } + ret := make([]iofs.DirEntry, 0, dirSize) + for _, dirNode := range p.pb.Directories { + if n > 0 && len(ret) == n { + return ret, nil + } + dir := p.children[digest.NewFromProtoUnvalidated(dirNode.Digest)] + i := &info{ + name: dirNode.Name, + isDir: true, + typeMode: os.ModeDir, + } + + ret = append(ret, i.withProperties(dir.NodeProperties)) + } + for _, file := range p.pb.Files { + if n > 0 && len(ret) == n { + return ret, nil + } + i := &info{ + name: file.Name, + size: file.Digest.SizeBytes, + } + ret = append(ret, i.withProperties(file.NodeProperties)) + } + for _, link := range p.pb.Symlinks { + if n > 0 && len(ret) == n { + return ret, nil + } + i := &info{ + name: link.Name, + typeMode: os.ModeSymlink, + } + ret = append(ret, i.withProperties(link.NodeProperties)) + } + return ret, nil +} + +func (p *dir) Stat() (iofs.FileInfo, error) { + return p, nil +} + +func (p *dir) Read(_ []byte) (int, error) { + return 0, errors.New("attempt to read a directory") +} + +func (p *dir) Close() error { + return nil +} diff --git a/src/remote/fs/fs_test.go b/src/remote/fs/fs_test.go new file mode 100644 index 0000000000..25d07593ed --- /dev/null +++ b/src/remote/fs/fs_test.go @@ -0,0 +1,241 @@ +package fs + +import ( + "context" + iofs "io/fs" + "testing" + + "github.com/bazelbuild/remote-apis-sdks/go/pkg/client" + "github.com/bazelbuild/remote-apis-sdks/go/pkg/digest" + pb "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" + "github.com/golang/protobuf/ptypes/wrappers" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var fooContent = "wibble wibble wibble" + +type fakeClient struct { + results map[digest.Digest][]byte +} + +func (f *fakeClient) ReadBlob(_ context.Context, d digest.Digest) ([]byte, *client.MovedBytesMetadata, error) { + res := f.results[d] + return res, nil, nil +} + +func newDigest(str string) digest.Digest { + return digest.NewFromBlob([]byte(str)) +} + +// getTree returns a pb.Tree proto representing the following dir structure: +// . (root) +// |- foo (file containing wibble wibble wibble) +// |- bar +// +// |- empty (an empty directory) +// |- foo (same file as above) +// |- example.go (not in CAS) +// |- example_test.go (not in CAS) +// |- link (a symlink to ../foo i.e. foo in the root dir) +// |- badlink (a symlink to ../../foo which is root/.. i.e. invalid) +func getTree(t *testing.T) (*fakeClient, *pb.Tree) { + t.Helper() + fooDigest := newDigest(fooContent) + + foo := &pb.FileNode{ + Name: "foo", + NodeProperties: &pb.NodeProperties{UnixMode: &wrappers.UInt32Value{ + Value: 0777, + }}, + Digest: fooDigest.ToProto(), + } + + empty := &pb.Directory{ + NodeProperties: &pb.NodeProperties{UnixMode: &wrappers.UInt32Value{ + Value: 0777, + }}, + } + emptyDigest, err := digest.NewFromMessage(empty) + require.NoError(t, err) + + bar := &pb.Directory{ + Files: []*pb.FileNode{ + foo, + { + Name: "example.go", + Digest: newDigest("example.go").ToProto(), + NodeProperties: &pb.NodeProperties{UnixMode: &wrappers.UInt32Value{ + Value: 0777, + }}, + }, + { + Name: "example_test.go", + Digest: newDigest("example_test.go").ToProto(), + NodeProperties: &pb.NodeProperties{UnixMode: &wrappers.UInt32Value{ + Value: 0777, + }}, + }, + }, + Symlinks: []*pb.SymlinkNode{ + { + Name: "link", + Target: "../foo", + NodeProperties: &pb.NodeProperties{UnixMode: &wrappers.UInt32Value{ + Value: 0777, + }}, + }, + { + Name: "badlink", + Target: "../../foo", + NodeProperties: &pb.NodeProperties{UnixMode: &wrappers.UInt32Value{ + Value: 0777, + }}, + }, + }, + Directories: []*pb.DirectoryNode{ + { + Name: "empty", + Digest: emptyDigest.ToProto(), + }, + }, + NodeProperties: &pb.NodeProperties{UnixMode: &wrappers.UInt32Value{ + Value: 0777, + }}, + } + + barDigest, err := digest.NewFromMessage(bar) + require.NoError(t, err) + + root := &pb.Directory{ + Files: []*pb.FileNode{ + foo, + }, + Directories: []*pb.DirectoryNode{ + { + Name: "bar", + Digest: barDigest.ToProto(), + }, + }, + } + + fc := &fakeClient{ + results: map[digest.Digest][]byte{ + fooDigest: []byte(fooContent), + }, + } + tree := &pb.Tree{ + Root: root, + Children: []*pb.Directory{ + bar, + empty, + }, + } + return fc, tree +} + +func TestReadDir(t *testing.T) { + fc, tree := getTree(t) + fs := New(fc, tree, "") + + entries, err := iofs.ReadDir(fs, "bar") + require.NoError(t, err) + assert.Len(t, entries, 6) + + for _, e := range entries { + i, err := e.Info() + require.NoError(t, err) + // We set them all to 0777 above + assert.Equal(t, iofs.FileMode(0777), i.Mode(), "%v mode was wrong", e.Name()) + if e.Name() == "foo" { + assert.Equal(t, len([]byte(fooContent)), int(i.Size())) + } + } + + entries, err = iofs.ReadDir(fs, ".") + require.NoError(t, err) + require.Len(t, entries, 2) +} + +func TestGlob(t *testing.T) { + fc, tree := getTree(t) + fs := New(fc, tree, "") + + matches, err := iofs.Glob(fs, "bar/*.go") + require.NoError(t, err) + assert.Len(t, matches, 2) + assert.ElementsMatch(t, matches, []string{"bar/example.go", "bar/example_test.go"}) +} + +func TestReadFile(t *testing.T) { + fc, tree := getTree(t) + + tests := []struct { + name string + wd string + file string + expectError bool + expectedOutput string + }{ + { + name: "Open file in root", + wd: ".", + file: "foo", + expectedOutput: "wibble wibble wibble", + }, + { + name: "Open file in root with .", + wd: ".", + file: "./foo", + expectedOutput: "wibble wibble wibble", + }, + { + name: "Open file in dir", + wd: ".", + file: "bar/foo", + expectedOutput: "wibble wibble wibble", + }, + { + name: "Open file in dir with .", + wd: ".", + file: "bar/./foo", + expectedOutput: "wibble wibble wibble", + }, + { + name: "Open file with working dir", + wd: "bar", + file: "foo", + expectedOutput: "wibble wibble wibble", + }, + { + name: "Open symlink", + wd: ".", + file: "bar/link", + expectedOutput: "wibble wibble wibble", + }, + { + name: "Open symlink from working dir", + wd: "bar", + file: "link", + expectedOutput: "wibble wibble wibble", + }, + { + name: "Open bad symlink", + wd: ".", + file: "bar/badlink", + expectError: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + bs, err := iofs.ReadFile(New(fc, tree, tc.wd), tc.file) + if tc.expectError { + assert.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tc.expectedOutput, string(bs)) + }) + } +} diff --git a/src/remote/fs/info.go b/src/remote/fs/info.go new file mode 100644 index 0000000000..ce0d9f42d3 --- /dev/null +++ b/src/remote/fs/info.go @@ -0,0 +1,67 @@ +package fs + +import ( + iofs "io/fs" + "os" + "time" + + pb "github.com/bazelbuild/remote-apis/build/bazel/remote/execution/v2" +) + +// info represents information about a file/directory +type info struct { + name string + isDir bool + size int64 + modTime time.Time + mode os.FileMode + typeMode os.FileMode +} + +func (i *info) Type() iofs.FileMode { + return i.typeMode +} + +func (i *info) Info() (iofs.FileInfo, error) { + return i, nil +} + +func (i *info) Name() string { + return i.name +} + +func (i *info) Size() int64 { + return i.size +} + +func (i *info) Mode() iofs.FileMode { + return i.mode +} + +func (i *info) ModTime() time.Time { + return i.modTime +} + +func (i *info) IsDir() bool { + return i.isDir +} + +func (i *info) Sys() any { + return nil +} + +// withProperties safely sets the node info if it's available. +func (i *info) withProperties(nodeProperties *pb.NodeProperties) *info { + if nodeProperties == nil { + return i + } + + if nodeProperties.UnixMode != nil { + i.mode = os.FileMode(nodeProperties.UnixMode.Value) + } + + if nodeProperties.Mtime != nil { + i.modTime = nodeProperties.Mtime.AsTime() + } + return i +}