From f51fc9822b8bcc646d2e10cc6d3802864f95d8bd Mon Sep 17 00:00:00 2001 From: PJ Date: Tue, 5 Mar 2024 18:22:25 +0100 Subject: [PATCH 1/3] worker: add HEAD object endpoint --- api/object.go | 66 ++++++++++++++++++++++++++- internal/test/e2e/metadata_test.go | 35 ++++++++++---- worker/client/client.go | 73 ++++++++++++++++++------------ worker/serve.go | 8 ++-- worker/worker.go | 41 +++++++++++++++++ 5 files changed, 178 insertions(+), 45 deletions(-) diff --git a/api/object.go b/api/object.go index cef672a97..f53dbb5ac 100644 --- a/api/object.go +++ b/api/object.go @@ -83,7 +83,7 @@ type ( Object *Object `json:"object,omitempty"` } - // GetObjectResponse is the response type for the /worker/object endpoint. + // GetObjectResponse is the response type for the GET /worker/object endpoint. GetObjectResponse struct { Content io.ReadCloser `json:"content"` ContentType string `json:"contentType"` @@ -91,6 +91,16 @@ type ( Range *DownloadRange `json:"range,omitempty"` Size int64 `json:"size"` Metadata ObjectUserMetadata `json:"metadata"` + // NOTE: keep HeadObjectResponse in sync with this type + } + + // HeadObjectResponse is the response type for the HEAD /worker/object endpoint. + HeadObjectResponse struct { + ContentType string `json:"contentType"` + LastModified string `json:"lastModified"` + Range *DownloadRange `json:"range,omitempty"` + Size int64 `json:"size"` + Metadata ObjectUserMetadata `json:"metadata"` } // ObjectsDeleteRequest is the request type for the /bus/objects/list endpoint. @@ -135,6 +145,46 @@ type ( } ) +func ParseObjectHeadResponseFrom(header http.Header) (HeadObjectResponse, error) { + // parse size + var size int64 + _, err := fmt.Sscan(header.Get("Content-Length"), &size) + if err != nil { + return HeadObjectResponse{}, err + } + + // parse range + var r *DownloadRange + if cr := header.Get("Content-Range"); cr != "" { + dr, err := ParseDownloadRange(cr) + if err != nil { + return HeadObjectResponse{}, err + } + r = &dr + + // if a range is set, the size is the size extracted from the range + // since Content-Length will then only be the length of the returned + // range. + size = dr.Size + } + + // parse headers + headers := make(map[string]string) + for k, v := range header { + if len(v) > 0 { + headers[k] = v[0] + } + } + + return HeadObjectResponse{ + ContentType: header.Get("Content-Type"), + LastModified: header.Get("Last-Modified"), + Range: r, + Size: size, + Metadata: ExtractObjectUserMetadataFrom(headers), + }, nil +} + func ExtractObjectUserMetadataFrom(metadata map[string]string) ObjectUserMetadata { oum := make(map[string]string) for k, v := range metadata { @@ -206,6 +256,10 @@ type ( Batch bool } + HeadObjectOptions struct { + Range DownloadRange + } + DownloadObjectOptions struct { GetObjectOptions Range DownloadRange @@ -301,6 +355,16 @@ func (opts DeleteObjectOptions) Apply(values url.Values) { } } +func (opts HeadObjectOptions) ApplyHeaders(h http.Header) { + if opts.Range != (DownloadRange{}) { + if opts.Range.Length == -1 { + h.Set("Range", fmt.Sprintf("bytes=%v-", opts.Range.Offset)) + } else { + h.Set("Range", fmt.Sprintf("bytes=%v-%v", opts.Range.Offset, opts.Range.Offset+opts.Range.Length-1)) + } + } +} + func (opts GetObjectOptions) Apply(values url.Values) { if opts.Prefix != "" { values.Set("prefix", opts.Prefix) diff --git a/internal/test/e2e/metadata_test.go b/internal/test/e2e/metadata_test.go index b71078eef..d11f6ba4e 100644 --- a/internal/test/e2e/metadata_test.go +++ b/internal/test/e2e/metadata_test.go @@ -33,27 +33,42 @@ func TestObjectMetadata(t *testing.T) { } // upload the object - _, err := w.UploadObject(context.Background(), bytes.NewReader([]byte(t.Name())), api.DefaultBucketName, t.Name(), opts) + data := []byte(t.Name()) + _, err := w.UploadObject(context.Background(), bytes.NewReader(data), api.DefaultBucketName, t.Name(), opts) if err != nil { t.Fatal(err) } // get the object from the bus and assert it has the metadata - ress, err := b.Object(context.Background(), api.DefaultBucketName, t.Name(), api.GetObjectOptions{}) + or, err := b.Object(context.Background(), api.DefaultBucketName, t.Name(), api.GetObjectOptions{}) if err != nil { t.Fatal(err) } - if !reflect.DeepEqual(ress.Object.Metadata, opts.Metadata) { - t.Fatal("metadata mismatch", ress.Object.Metadata) + if !reflect.DeepEqual(or.Object.Metadata, opts.Metadata) { + t.Fatal("metadata mismatch", or.Object.Metadata) } // get the object from the worker and assert it has the metadata - res, err := w.GetObject(context.Background(), api.DefaultBucketName, t.Name(), api.DownloadObjectOptions{}) + gor, err := w.GetObject(context.Background(), api.DefaultBucketName, t.Name(), api.DownloadObjectOptions{}) if err != nil { t.Fatal(err) } - if !reflect.DeepEqual(res.Metadata, opts.Metadata) { - t.Fatal("metadata mismatch", res.Metadata) + if !reflect.DeepEqual(gor.Metadata, opts.Metadata) { + t.Fatal("metadata mismatch", gor.Metadata) + } + + // perform a HEAD request and assert the headers are all present + hor, err := w.HeadObject(context.Background(), api.DefaultBucketName, t.Name(), api.HeadObjectOptions{Range: api.DownloadRange{Offset: 1, Length: 1}}) + if err != nil { + t.Fatal(err) + } else if !reflect.DeepEqual(hor, &api.HeadObjectResponse{ + ContentType: or.Object.ContentType(), + LastModified: or.Object.LastModified(), + Range: &api.DownloadRange{Offset: 1, Length: 1, Size: int64(len(data))}, + Size: int64(len(data)), + Metadata: gor.Metadata, + }) { + t.Fatalf("unexpected response: %+v", hor) } // re-upload the object @@ -63,11 +78,11 @@ func TestObjectMetadata(t *testing.T) { } // assert metadata was removed - res, err = w.GetObject(context.Background(), api.DefaultBucketName, t.Name(), api.DownloadObjectOptions{}) + gor, err = w.GetObject(context.Background(), api.DefaultBucketName, t.Name(), api.DownloadObjectOptions{}) if err != nil { t.Fatal(err) } - if len(res.Metadata) > 0 { - t.Fatal("unexpected metadata", res.Metadata) + if len(gor.Metadata) > 0 { + t.Fatal("unexpected metadata", gor.Metadata) } } diff --git a/worker/client/client.go b/worker/client/client.go index f45789093..ff8625541 100644 --- a/worker/client/client.go +++ b/worker/client/client.go @@ -77,13 +77,49 @@ func (c *Client) DownloadStats() (resp api.DownloadStatsResponse, err error) { return } +// HeadObject returns the metadata of the object at the given path. +func (c *Client) HeadObject(ctx context.Context, bucket, path string, opts api.HeadObjectOptions) (*api.HeadObjectResponse, error) { + c.c.Custom("HEAD", fmt.Sprintf("/objects/%s", path), nil, nil) + + if strings.HasSuffix(path, "/") { + return nil, errors.New("the given path is a directory, HEAD can only be performed on objects") + } + + values := url.Values{} + values.Set("bucket", url.QueryEscape(bucket)) + path += "?" + values.Encode() + + // TODO: support HEAD in jape client + req, err := http.NewRequestWithContext(ctx, "HEAD", fmt.Sprintf("%s/objects/%s", c.c.BaseURL, path), nil) + if err != nil { + panic(err) + } + req.SetBasicAuth("", c.c.WithContext(ctx).Password) + opts.ApplyHeaders(req.Header) + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + if resp.StatusCode != 200 && resp.StatusCode != 206 { + err, _ := io.ReadAll(resp.Body) + _ = resp.Body.Close() + return nil, errors.New(string(err)) + } + + head, err := api.ParseObjectHeadResponseFrom(resp.Header) + if err != nil { + return nil, err + } + return &head, nil +} + // GetObject returns the object at given path alongside its metadata. func (c *Client) GetObject(ctx context.Context, bucket, path string, opts api.DownloadObjectOptions) (*api.GetObjectResponse, error) { if strings.HasSuffix(path, "/") { return nil, errors.New("the given path is a directory, use ObjectEntries instead") } - // Start download. path = api.ObjectPathEscape(path) body, header, err := c.object(ctx, bucket, path, opts) if err != nil { @@ -96,41 +132,18 @@ func (c *Client) GetObject(ctx context.Context, bucket, path string, opts api.Do } }() - // Parse header. - var size int64 - _, err = fmt.Sscan(header.Get("Content-Length"), &size) + head, err := api.ParseObjectHeadResponseFrom(header) if err != nil { return nil, err } - var r *api.DownloadRange - if cr := header.Get("Content-Range"); cr != "" { - dr, err := api.ParseDownloadRange(cr) - if err != nil { - return nil, err - } - r = &dr - - // If a range is set, the size is the size extracted from the range - // since Content-Length will then only be the length of the returned - // range. - size = dr.Size - } - - // Parse headers. - headers := make(map[string]string) - for k, v := range header { - if len(v) > 0 { - headers[k] = v[0] - } - } return &api.GetObjectResponse{ Content: body, - ContentType: header.Get("Content-Type"), - LastModified: header.Get("Last-Modified"), - Range: r, - Size: size, - Metadata: api.ExtractObjectUserMetadataFrom(headers), + ContentType: head.ContentType, + LastModified: head.LastModified, + Range: head.Range, + Size: head.Size, + Metadata: head.Metadata, }, nil } diff --git a/worker/serve.go b/worker/serve.go index 76c1fb2d5..25d0c0412 100644 --- a/worker/serve.go +++ b/worker/serve.go @@ -76,9 +76,6 @@ func serveContent(rw http.ResponseWriter, req *http.Request, obj api.Object, dow } }() - // create a content reader - rs := newContentReader(pr, obj, offset) - // fetch the content type, if not set and we can't infer it from object's // name we default to application/octet-stream, that is important because we // have to avoid http.ServeContent to sniff the content type as it would @@ -87,17 +84,20 @@ func serveContent(rw http.ResponseWriter, req *http.Request, obj api.Object, dow if contentType == "" { contentType = "application/octet-stream" } + rw.Header().Set("Content-Type", contentType) // set the response headers, no need to set Last-Modified header as // serveContent does that for us rw.Header().Set("ETag", api.FormatETag(obj.ETag)) - rw.Header().Set("Content-Type", contentType) // set the user metadata headers for k, v := range obj.Metadata { rw.Header().Set(fmt.Sprintf("%s%s", api.ObjectMetadataPrefix, k), v) } + // create a content reader + rs := newContentReader(pr, obj, offset) + http.ServeContent(rw, req, obj.Name, obj.ModTime.Std(), rs) return http.StatusOK, nil } diff --git a/worker/worker.go b/worker/worker.go index b335a5f6c..91786c481 100644 --- a/worker/worker.go +++ b/worker/worker.go @@ -860,6 +860,46 @@ func (w *worker) uploadsStatsHandlerGET(jc jape.Context) { }) } +func (w *worker) objectsHandlerHEAD(jc jape.Context) { + // parse bucket + bucket := api.DefaultBucketName + if jc.DecodeForm("bucket", &bucket) != nil { + return + } + + // parse path + path := jc.PathParam("path") + if path == "" || strings.HasSuffix(path, "/") { + jc.Error(fmt.Errorf("directories are not accepted"), http.StatusBadRequest) + return + } + + // fetch object metadata + res, err := w.bus.Object(jc.Request.Context(), bucket, path, api.GetObjectOptions{ + OnlyMetadata: true, + }) + if errors.Is(err, api.ErrObjectNotFound) { + jc.Error(err, http.StatusNotFound) + return + } else if err != nil { + jc.Error(err, http.StatusInternalServerError) + return + } else if res.Object == nil { + jc.Error(api.ErrObjectNotFound, http.StatusInternalServerError) // should never happen but checking because we deref. later + return + } + + // serve the content + status, err := serveContent(jc.ResponseWriter, jc.Request, *res.Object, func(io.Writer, int64, int64) error { return nil }) + if errors.Is(err, http_range.ErrInvalid) || errors.Is(err, errMultiRangeNotSupported) { + jc.Error(err, http.StatusBadRequest) + } else if errors.Is(err, http_range.ErrNoOverlap) { + jc.Error(err, http.StatusRequestedRangeNotSatisfiable) + } else if err != nil { + jc.Error(err, status) + } +} + func (w *worker) objectsHandlerGET(jc jape.Context) { jc.Custom(nil, []api.ObjectMetadata{}) @@ -1366,6 +1406,7 @@ func (w *worker) Handler() http.Handler { "GET /stats/uploads": w.uploadsStatsHandlerGET, "POST /slab/migrate": w.slabMigrateHandler, + "HEAD /objects/*path": w.objectsHandlerHEAD, "GET /objects/*path": w.objectsHandlerGET, "PUT /objects/*path": w.objectsHandlerPUT, "DELETE /objects/*path": w.objectsHandlerDELETE, From 906bcc8295ed6f788c2e28c6f4eaf0c2b026654e Mon Sep 17 00:00:00 2001 From: PJ Date: Tue, 5 Mar 2024 18:43:35 +0100 Subject: [PATCH 2/3] worker: updat error message --- worker/worker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/worker/worker.go b/worker/worker.go index 91786c481..729c544c7 100644 --- a/worker/worker.go +++ b/worker/worker.go @@ -870,7 +870,7 @@ func (w *worker) objectsHandlerHEAD(jc jape.Context) { // parse path path := jc.PathParam("path") if path == "" || strings.HasSuffix(path, "/") { - jc.Error(fmt.Errorf("directories are not accepted"), http.StatusBadRequest) + jc.Error(errors.New("HEAD requests can only be performed on objects, not directories"), http.StatusBadRequest) return } From e9ba938876e020ad5094cd51ae09ba80356a9f40 Mon Sep 17 00:00:00 2001 From: PJ Date: Wed, 6 Mar 2024 16:56:23 +0100 Subject: [PATCH 3/3] worker: implement CR remarks --- api/object.go | 49 ++------------------------------------ worker/client/client.go | 52 ++++++++++++++++++++++++++++++++++------- worker/worker.go | 3 ++- 3 files changed, 48 insertions(+), 56 deletions(-) diff --git a/api/object.go b/api/object.go index f53dbb5ac..4b1993341 100644 --- a/api/object.go +++ b/api/object.go @@ -85,13 +85,8 @@ type ( // GetObjectResponse is the response type for the GET /worker/object endpoint. GetObjectResponse struct { - Content io.ReadCloser `json:"content"` - ContentType string `json:"contentType"` - LastModified string `json:"lastModified"` - Range *DownloadRange `json:"range,omitempty"` - Size int64 `json:"size"` - Metadata ObjectUserMetadata `json:"metadata"` - // NOTE: keep HeadObjectResponse in sync with this type + Content io.ReadCloser `json:"content"` + HeadObjectResponse } // HeadObjectResponse is the response type for the HEAD /worker/object endpoint. @@ -145,46 +140,6 @@ type ( } ) -func ParseObjectHeadResponseFrom(header http.Header) (HeadObjectResponse, error) { - // parse size - var size int64 - _, err := fmt.Sscan(header.Get("Content-Length"), &size) - if err != nil { - return HeadObjectResponse{}, err - } - - // parse range - var r *DownloadRange - if cr := header.Get("Content-Range"); cr != "" { - dr, err := ParseDownloadRange(cr) - if err != nil { - return HeadObjectResponse{}, err - } - r = &dr - - // if a range is set, the size is the size extracted from the range - // since Content-Length will then only be the length of the returned - // range. - size = dr.Size - } - - // parse headers - headers := make(map[string]string) - for k, v := range header { - if len(v) > 0 { - headers[k] = v[0] - } - } - - return HeadObjectResponse{ - ContentType: header.Get("Content-Type"), - LastModified: header.Get("Last-Modified"), - Range: r, - Size: size, - Metadata: ExtractObjectUserMetadataFrom(headers), - }, nil -} - func ExtractObjectUserMetadataFrom(metadata map[string]string) ObjectUserMetadata { oum := make(map[string]string) for k, v := range metadata { diff --git a/worker/client/client.go b/worker/client/client.go index ff8625541..410e4c66e 100644 --- a/worker/client/client.go +++ b/worker/client/client.go @@ -107,7 +107,7 @@ func (c *Client) HeadObject(ctx context.Context, bucket, path string, opts api.H return nil, errors.New(string(err)) } - head, err := api.ParseObjectHeadResponseFrom(resp.Header) + head, err := parseObjectResponseHeaders(resp.Header) if err != nil { return nil, err } @@ -132,18 +132,14 @@ func (c *Client) GetObject(ctx context.Context, bucket, path string, opts api.Do } }() - head, err := api.ParseObjectHeadResponseFrom(header) + head, err := parseObjectResponseHeaders(header) if err != nil { return nil, err } return &api.GetObjectResponse{ - Content: body, - ContentType: head.ContentType, - LastModified: head.LastModified, - Range: head.Range, - Size: head.Size, - Metadata: head.Metadata, + Content: body, + HeadObjectResponse: head, }, nil } @@ -296,6 +292,46 @@ func (c *Client) object(ctx context.Context, bucket, path string, opts api.Downl return resp.Body, resp.Header, err } +func parseObjectResponseHeaders(header http.Header) (api.HeadObjectResponse, error) { + // parse size + var size int64 + _, err := fmt.Sscan(header.Get("Content-Length"), &size) + if err != nil { + return api.HeadObjectResponse{}, err + } + + // parse range + var r *api.DownloadRange + if cr := header.Get("Content-Range"); cr != "" { + dr, err := api.ParseDownloadRange(cr) + if err != nil { + return api.HeadObjectResponse{}, err + } + r = &dr + + // if a range is set, the size is the size extracted from the range + // since Content-Length will then only be the length of the returned + // range. + size = dr.Size + } + + // parse headers + headers := make(map[string]string) + for k, v := range header { + if len(v) > 0 { + headers[k] = v[0] + } + } + + return api.HeadObjectResponse{ + ContentType: header.Get("Content-Type"), + LastModified: header.Get("Last-Modified"), + Range: r, + Size: size, + Metadata: api.ExtractObjectUserMetadataFrom(headers), + }, nil +} + func sizeFromSeeker(r io.Reader) (int64, error) { s, ok := r.(io.Seeker) if !ok { diff --git a/worker/worker.go b/worker/worker.go index 729c544c7..498269bf4 100644 --- a/worker/worker.go +++ b/worker/worker.go @@ -889,7 +889,8 @@ func (w *worker) objectsHandlerHEAD(jc jape.Context) { return } - // serve the content + // serve the content to ensure we're setting the exact same headers as we + // would for a GET request status, err := serveContent(jc.ResponseWriter, jc.Request, *res.Object, func(io.Writer, int64, int64) error { return nil }) if errors.Is(err, http_range.ErrInvalid) || errors.Is(err, errMultiRangeNotSupported) { jc.Error(err, http.StatusBadRequest)